[llvm] 2a1716d - [LegalizeTypes][VP] Widen load/store of fixed length vectors to VP ops

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 12 02:21:17 PDT 2023


Author: Luke Lau
Date: 2023-06-12T10:21:04+01:00
New Revision: 2a1716dec57e8b3dd668df17ecbedfc77a4112e5

URL: https://github.com/llvm/llvm-project/commit/2a1716dec57e8b3dd668df17ecbedfc77a4112e5
DIFF: https://github.com/llvm/llvm-project/commit/2a1716dec57e8b3dd668df17ecbedfc77a4112e5.diff

LOG: [LegalizeTypes][VP] Widen load/store of fixed length vectors to VP ops

If we have a load/store with an illegal fixed length vector result type that
needs widened, e.g. `x:v6i32 = load p`
Instead of just widening it to: `x:v8i32 = load p`
We can widen it to the equivalent VP operation and set the EVL to the
exact number of elements needed: `x:v8i32 = vp_load a, b, mask=true, evl=6`
Provided that the target supports vp_load/vp_store on the widened type.

Scalable vectors are already widened this way where possible, so this
largely reuses the same logic.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D148713

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index e2e1a837aeb59..a1a150d5234b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5185,30 +5185,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
     return SDValue();
   }
 
-  SDValue Result;
-  SmallVector<SDValue, 16> LdChain;  // Chain for the series of load
-  if (ExtType != ISD::NON_EXTLOAD)
-    Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
-  else
-    Result = GenWidenVectorLoads(LdChain, LD);
-
-  if (Result) {
-    // If we generate a single load, we can use that for the chain.  Otherwise,
-    // build a factor node to remember the multiple loads are independent and
-    // chain to that.
-    SDValue NewChain;
-    if (LdChain.size() == 1)
-      NewChain = LdChain[0];
-    else
-      NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
-
-    // Modified the chain - switch anything that used the old chain to use
-    // the new one.
-    ReplaceValueWith(SDValue(N, 1), NewChain);
-
-    return Result;
-  }
-
   // Generate a vector-predicated load if it is custom/legal on the target. To
   // avoid possible recursion, only do this if the widened mask type is legal.
   // FIXME: Not all targets may support EVL in VP_LOAD. These will have been
@@ -5218,15 +5194,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
   EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
   EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
                                     WideVT.getVectorElementCount());
-  if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() &&
+  if (ExtType == ISD::NON_EXTLOAD &&
       TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
       TLI.isTypeLegal(WideMaskVT)) {
     SDLoc DL(N);
     SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
-    MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
-    unsigned NumVTElts = LdVT.getVectorMinNumElements();
-    SDValue EVL =
-        DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+    SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
+                                      LdVT.getVectorElementCount());
     const auto *MMO = LD->getMemOperand();
     SDValue NewLoad =
         DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL,
@@ -5240,6 +5214,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
     return NewLoad;
   }
 
+  SDValue Result;
+  SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+  if (ExtType != ISD::NON_EXTLOAD)
+    Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+  else
+    Result = GenWidenVectorLoads(LdChain, LD);
+
+  if (Result) {
+    // If we generate a single load, we can use that for the chain.  Otherwise,
+    // build a factor node to remember the multiple loads are independent and
+    // chain to that.
+    SDValue NewChain;
+    if (LdChain.size() == 1)
+      NewChain = LdChain[0];
+    else
+      NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+
+    // Modified the chain - switch anything that used the old chain to use
+    // the new one.
+    ReplaceValueWith(SDValue(N, 1), NewChain);
+
+    return Result;
+  }
+
   report_fatal_error("Unable to widen vector load");
 }
 
@@ -6272,14 +6270,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
   if (ST->isTruncatingStore())
     return TLI.scalarizeVectorStore(ST, DAG);
 
-  SmallVector<SDValue, 16> StChain;
-  if (GenWidenVectorStores(StChain, ST)) {
-    if (StChain.size() == 1)
-      return StChain[0];
-
-    return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
-  }
-
   // Generate a vector-predicated store if it is custom/legal on the target.
   // To avoid possible recursion, only do this if the widened mask type is
   // legal.
@@ -6291,23 +6281,29 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
   EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
   EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
                                     WideVT.getVectorElementCount());
-  if (WideVT.isScalableVector() &&
-      TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
+
+  if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
       TLI.isTypeLegal(WideMaskVT)) {
     // Widen the value.
     SDLoc DL(N);
     StVal = GetWidenedVector(StVal);
     SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
-    MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
-    unsigned NumVTElts = StVT.getVectorMinNumElements();
-    SDValue EVL =
-        DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+    SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
+                                      StVT.getVectorElementCount());
     return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(),
                           DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask,
-                          EVL, StVal.getValueType(), ST->getMemOperand(),
+                          EVL, StVT, ST->getMemOperand(),
                           ST->getAddressingMode());
   }
 
+  SmallVector<SDValue, 16> StChain;
+  if (GenWidenVectorStores(StChain, ST)) {
+    if (StChain.size() == 1)
+      return StChain[0];
+
+    return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+  }
+
   report_fatal_error("Unable to widen vector store");
 }
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
index f11a7b44edaab..b93b35ac61664 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
@@ -37,33 +37,16 @@ define void @abs_v8i16(ptr %x) {
 declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
 
 define void @abs_v6i16(ptr %x) {
-; LMULMAX1-RV32-LABEL: abs_v6i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: abs_v6i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vrsub.vi v9, v8, 0
-; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: abs_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-NEXT:    vmax.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = call <6 x i16> @llvm.abs.v6i16(<6 x i16> %a, i1 false)
   store <6 x i16> %b, ptr %x

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index 16c7611a84430..1d34be990dd25 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -220,8 +220,8 @@ define double @extractelt_v4f64(ptr %x) nounwind {
 define i64 @extractelt_v3i64(ptr %x) nounwind {
 ; RV32-LABEL: extractelt_v3i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    vsetivli zero, 3, e64, m2, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
 ; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
 ; RV32-NEXT:    vslidedown.vi v10, v8, 4
 ; RV32-NEXT:    vmv.x.s a0, v10
@@ -231,7 +231,7 @@ define i64 @extractelt_v3i64(ptr %x) nounwind {
 ;
 ; RV64-LABEL: extractelt_v3i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vsetivli zero, 3, e64, m2, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV64-NEXT:    vslidedown.vi v8, v8, 2
@@ -485,8 +485,9 @@ define double @extractelt_v4f64_idx(ptr %x, i32 zeroext %idx) nounwind {
 define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind {
 ; RV32-LABEL: extractelt_v3i64_idx:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vsetivli zero, 3, e64, m2, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vadd.vv v8, v8, v8
 ; RV32-NEXT:    add a1, a1, a1
 ; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
@@ -499,8 +500,9 @@ define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind {
 ;
 ; RV64-LABEL: extractelt_v3i64_idx:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vsetivli zero, 3, e64, m2, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vadd.vv v8, v8, v8
 ; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV64-NEXT:    vslidedown.vx v8, v8, a1

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 8059f5b5c5545..d6c8a14808274 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -25,33 +25,16 @@ define void @fadd_v8f16(ptr %x, ptr %y) {
 }
 
 define void @fadd_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: fadd_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vfadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fadd_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vfadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fadd_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfadd.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fadd <6 x half> %a, %b
@@ -108,33 +91,16 @@ define void @fsub_v8f16(ptr %x, ptr %y) {
 }
 
 define void @fsub_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: fsub_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vfsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fsub_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vfsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fsub_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfsub.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fsub <6 x half> %a, %b
@@ -191,33 +157,16 @@ define void @fmul_v8f16(ptr %x, ptr %y) {
 }
 
 define void @fmul_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: fmul_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vfmul.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fmul_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vfmul.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fmul_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfmul.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fmul <6 x half> %a, %b
@@ -274,33 +223,16 @@ define void @fdiv_v8f16(ptr %x, ptr %y) {
 }
 
 define void @fdiv_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: fdiv_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vfdiv.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fdiv_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vfdiv.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fdiv_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfdiv.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fdiv <6 x half> %a, %b
@@ -355,31 +287,15 @@ define void @fneg_v8f16(ptr %x) {
 }
 
 define void @fneg_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: fneg_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfneg.v v8, v8
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fneg_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfneg.v v8, v8
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fneg_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfneg.v v8, v8
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = fneg <6 x half> %a
   store <6 x half> %b, ptr %x
@@ -430,31 +346,15 @@ define void @fabs_v8f16(ptr %x) {
 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
 
 define void @fabs_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: fabs_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfabs.v v8, v8
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fabs_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfabs.v v8, v8
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fabs_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfabs.v v8, v8
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -510,33 +410,16 @@ define void @copysign_v8f16(ptr %x, ptr %y) {
 declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
 
 define void @copysign_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: copysign_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: copysign_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: copysign_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
@@ -596,31 +479,15 @@ define void @copysign_vf_v8f16(ptr %x, half %y) {
 }
 
 define void @copysign_vf_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: copysign_vf_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfsgnj.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: copysign_vf_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfsgnj.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: copysign_vf_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -679,33 +546,16 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 }
 
 define void @copysign_neg_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: copysign_neg_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vfsgnjn.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: copysign_neg_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vfsgnjn.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: copysign_neg_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = fneg <6 x half> %b
@@ -769,36 +619,17 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
 
 define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    vfncvt.f.f.w v10, v8
-; LMULMAX1-RV32-NEXT:    vfsgnjn.vv v8, v9, v10
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 4
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vfncvt.f.f.w v10, v9
-; LMULMAX1-RV64-NEXT:    vfsgnjn.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 4
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
-; LMULMAX1-RV64-NEXT:    vse16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vfncvt.f.f.w v10, v8
+; CHECK-NEXT:    vfsgnjn.vv v8, v9, v10
+; CHECK-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <3 x half>, ptr %x
   %b = load <3 x float>, ptr %y
   %c = fneg <3 x float> %b
@@ -845,31 +676,15 @@ define void @sqrt_v8f16(ptr %x) {
 declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
 
 define void @sqrt_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: sqrt_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfsqrt.v v8, v8
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: sqrt_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfsqrt.v v8, v8
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: sqrt_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfsqrt.v v8, v8
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -927,35 +742,17 @@ define void @fma_v8f16(ptr %x, ptr %y, ptr %z) {
 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
 
 define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
-; LMULMAX1-RV32-LABEL: fma_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV32-NEXT:    vfmacc.vv v10, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v10, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fma_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vfmacc.vv v10, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fma_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v10, (a2)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfmacc.vv v10, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = load <6 x half>, ptr %z
@@ -1023,35 +820,17 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
 }
 
 define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
-; LMULMAX1-RV32-LABEL: fmsub_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV32-NEXT:    vfmsac.vv v10, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v10, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fmsub_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vfmsac.vv v10, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fmsub_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v10, (a2)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfmsac.vv v10, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = load <6 x half>, ptr %z
@@ -1858,35 +1637,19 @@ define void @fadd_vf_v8f16(ptr %x, half %y) {
 }
 
 define void @fadd_vf_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fadd_vf_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfadd.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fadd_vf_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfadd.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = insertelement <6 x half> poison, half %y, i32 0
-  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
-  %d = fadd <6 x half> %a, %c
+; CHECK-LABEL: fadd_vf_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %a = load <6 x half>, ptr %x
+  %b = insertelement <6 x half> poison, half %y, i32 0
+  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
+  %d = fadd <6 x half> %a, %c
   store <6 x half> %d, ptr %x
   ret void
 }
@@ -1940,31 +1703,15 @@ define void @fadd_fv_v8f16(ptr %x, half %y) {
 }
 
 define void @fadd_fv_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fadd_fv_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfadd.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fadd_fv_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfadd.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fadd_fv_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2022,31 +1769,15 @@ define void @fsub_vf_v8f16(ptr %x, half %y) {
 }
 
 define void @fsub_vf_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fsub_vf_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfsub.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fsub_vf_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfsub.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fsub_vf_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2104,31 +1835,15 @@ define void @fsub_fv_v8f16(ptr %x, half %y) {
 }
 
 define void @fsub_fv_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fsub_fv_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfrsub.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fsub_fv_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfrsub.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fsub_fv_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2186,31 +1901,15 @@ define void @fmul_vf_v8f16(ptr %x, half %y) {
 }
 
 define void @fmul_vf_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fmul_vf_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfmul.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fmul_vf_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfmul.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fmul_vf_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2268,31 +1967,15 @@ define void @fmul_fv_v8f16(ptr %x, half %y) {
 }
 
 define void @fmul_fv_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fmul_fv_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfmul.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fmul_fv_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfmul.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fmul_fv_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2350,31 +2033,15 @@ define void @fdiv_vf_v8f16(ptr %x, half %y) {
 }
 
 define void @fdiv_vf_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fdiv_vf_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfdiv.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fdiv_vf_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfdiv.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fdiv_vf_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2432,31 +2099,15 @@ define void @fdiv_fv_v8f16(ptr %x, half %y) {
 }
 
 define void @fdiv_fv_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fdiv_fv_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vfrdiv.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fdiv_fv_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vfrdiv.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fdiv_fv_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = insertelement <6 x half> poison, half %y, i32 0
   %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2516,33 +2167,16 @@ define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) {
 }
 
 define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
-; LMULMAX1-RV32-LABEL: fma_vf_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vfmacc.vf v9, fa0, v8
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fma_vf_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vfmacc.vf v9, fa0, v8
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fma_vf_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v9, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = insertelement <6 x half> poison, half %z, i32 0
@@ -2607,33 +2241,16 @@ define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) {
 }
 
 define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
-; LMULMAX1-RV32-LABEL: fma_fv_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vfmacc.vf v9, fa0, v8
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fma_fv_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vfmacc.vf v9, fa0, v8
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fma_fv_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v9, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = insertelement <6 x half> poison, half %z, i32 0
@@ -2699,33 +2316,16 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
 }
 
 define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
-; LMULMAX1-RV32-LABEL: fmsub_vf_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vfmsac.vf v9, fa0, v8
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fmsub_vf_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vfmsac.vf v9, fa0, v8
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v9, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fmsub_vf_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfmsac.vf v9, fa0, v8
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v9, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = insertelement <6 x half> poison, half %z, i32 0
@@ -2837,45 +2437,22 @@ define void @trunc_v8f16(ptr %x) {
 declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
 
 define void @trunc_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: trunc_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI116_0)
-; LMULMAX1-RV32-NEXT:    flh fa5, %lo(.LCPI116_0)(a1)
-; LMULMAX1-RV32-NEXT:    vfabs.v v9, v8
-; LMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
-; LMULMAX1-RV32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; LMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: trunc_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    lui a1, %hi(.LCPI116_0)
-; LMULMAX1-RV64-NEXT:    flh fa5, %lo(.LCPI116_0)(a1)
-; LMULMAX1-RV64-NEXT:    vfabs.v v9, v8
-; LMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
-; LMULMAX1-RV64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; LMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: trunc_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    lui a1, %hi(.LCPI116_0)
+; CHECK-NEXT:    flh fa5, %lo(.LCPI116_0)(a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    vmflt.vf v0, v9, fa5
+; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -2952,49 +2529,24 @@ define void @ceil_v8f16(ptr %x) {
 declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
 
 define void @ceil_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: ceil_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI120_0)
-; LMULMAX1-RV32-NEXT:    flh fa5, %lo(.LCPI120_0)(a1)
-; LMULMAX1-RV32-NEXT:    vfabs.v v9, v8
-; LMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
-; LMULMAX1-RV32-NEXT:    fsrmi a1, 3
-; LMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV32-NEXT:    fsrm a1
-; LMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: ceil_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    lui a1, %hi(.LCPI120_0)
-; LMULMAX1-RV64-NEXT:    flh fa5, %lo(.LCPI120_0)(a1)
-; LMULMAX1-RV64-NEXT:    vfabs.v v9, v8
-; LMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
-; LMULMAX1-RV64-NEXT:    fsrmi a1, 3
-; LMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV64-NEXT:    fsrm a1
-; LMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: ceil_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    lui a1, %hi(.LCPI120_0)
+; CHECK-NEXT:    flh fa5, %lo(.LCPI120_0)(a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    vmflt.vf v0, v9, fa5
+; CHECK-NEXT:    fsrmi a1, 3
+; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT:    fsrm a1
+; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -3075,49 +2627,24 @@ define void @floor_v8f16(ptr %x) {
 declare <8 x half> @llvm.floor.v8f16(<8 x half>)
 
 define void @floor_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: floor_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI124_0)
-; LMULMAX1-RV32-NEXT:    flh fa5, %lo(.LCPI124_0)(a1)
-; LMULMAX1-RV32-NEXT:    vfabs.v v9, v8
-; LMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
-; LMULMAX1-RV32-NEXT:    fsrmi a1, 2
-; LMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV32-NEXT:    fsrm a1
-; LMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: floor_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    lui a1, %hi(.LCPI124_0)
-; LMULMAX1-RV64-NEXT:    flh fa5, %lo(.LCPI124_0)(a1)
-; LMULMAX1-RV64-NEXT:    vfabs.v v9, v8
-; LMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
-; LMULMAX1-RV64-NEXT:    fsrmi a1, 2
-; LMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV64-NEXT:    fsrm a1
-; LMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: floor_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    lui a1, %hi(.LCPI124_0)
+; CHECK-NEXT:    flh fa5, %lo(.LCPI124_0)(a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    vmflt.vf v0, v9, fa5
+; CHECK-NEXT:    fsrmi a1, 2
+; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT:    fsrm a1
+; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -3198,49 +2725,24 @@ define void @round_v8f16(ptr %x) {
 declare <8 x half> @llvm.round.v8f16(<8 x half>)
 
 define void @round_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: round_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    lui a1, %hi(.LCPI128_0)
-; LMULMAX1-RV32-NEXT:    flh fa5, %lo(.LCPI128_0)(a1)
-; LMULMAX1-RV32-NEXT:    vfabs.v v9, v8
-; LMULMAX1-RV32-NEXT:    vmflt.vf v0, v9, fa5
-; LMULMAX1-RV32-NEXT:    fsrmi a1, 4
-; LMULMAX1-RV32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV32-NEXT:    fsrm a1
-; LMULMAX1-RV32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: round_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    lui a1, %hi(.LCPI128_0)
-; LMULMAX1-RV64-NEXT:    flh fa5, %lo(.LCPI128_0)(a1)
-; LMULMAX1-RV64-NEXT:    vfabs.v v9, v8
-; LMULMAX1-RV64-NEXT:    vmflt.vf v0, v9, fa5
-; LMULMAX1-RV64-NEXT:    fsrmi a1, 4
-; LMULMAX1-RV64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV64-NEXT:    fsrm a1
-; LMULMAX1-RV64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: round_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    lui a1, %hi(.LCPI128_0)
+; CHECK-NEXT:    flh fa5, %lo(.LCPI128_0)(a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfabs.v v9, v8
+; CHECK-NEXT:    vmflt.vf v0, v9, fa5
+; CHECK-NEXT:    fsrmi a1, 4
+; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT:    fsrm a1
+; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
   store <6 x half> %b, ptr %x
@@ -3454,35 +2956,17 @@ define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
 declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>)
 
 define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
-; LMULMAX1-RV32-LABEL: fmuladd_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV32-NEXT:    vfmacc.vv v10, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v10, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fmuladd_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vfmacc.vv v10, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fmuladd_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v10, (a2)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfmacc.vv v10, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = load <6 x half>, ptr %z
@@ -3550,35 +3034,17 @@ define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
 }
 
 define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
-; LMULMAX1-RV32-LABEL: fmsub_fmuladd_v6f16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV32-NEXT:    vfmsac.vv v10, v8, v9
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-RV32-NEXT:    addi a1, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vse16.v v10, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: fmsub_fmuladd_v6f16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT:    vfmsac.vv v10, v8, v9
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vslidedown.vi v8, v10, 2
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: fmsub_fmuladd_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vle16.v v10, (a2)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vfmsac.vv v10, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v10, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
   %c = load <6 x half>, ptr %z

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index 88a5dbb1386a9..bb39feeb1d067 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -79,57 +79,15 @@ define <2 x i1> @fp2ui_v2f32_v2i1(<2 x float> %x) {
 }
 
 define void @fp2si_v3f32_v3i32(ptr %x, ptr %y) {
-; LMULMAX8RV32-LABEL: fp2si_v3f32_v3i32:
-; LMULMAX8RV32:       # %bb.0:
-; LMULMAX8RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX8RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; LMULMAX8RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX8RV32-NEXT:    addi a0, a1, 8
-; LMULMAX8RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX8RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX8RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX8RV32-NEXT:    ret
-;
-; LMULMAX8RV64-LABEL: fp2si_v3f32_v3i32:
-; LMULMAX8RV64:       # %bb.0:
-; LMULMAX8RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX8RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; LMULMAX8RV64-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX8RV64-NEXT:    addi a0, a1, 8
-; LMULMAX8RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX8RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX8RV64-NEXT:    ret
-;
-; LMULMAX1RV32-LABEL: fp2si_v3f32_v3i32:
-; LMULMAX1RV32:       # %bb.0:
-; LMULMAX1RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1RV32-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; LMULMAX1RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1RV32-NEXT:    addi a0, a1, 8
-; LMULMAX1RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX1RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1RV32-NEXT:    ret
-;
-; LMULMAX1RV64-LABEL: fp2si_v3f32_v3i32:
-; LMULMAX1RV64:       # %bb.0:
-; LMULMAX1RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1RV64-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; LMULMAX1RV64-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1RV64-NEXT:    addi a0, a1, 8
-; LMULMAX1RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX1RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX1RV64-NEXT:    ret
+; CHECK-LABEL: fp2si_v3f32_v3i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <3 x float>, ptr %x
   %d = fptosi <3 x float> %a to <3 x i32>
   store <3 x i32> %d, ptr %y
@@ -137,57 +95,15 @@ define void @fp2si_v3f32_v3i32(ptr %x, ptr %y) {
 }
 
 define void @fp2ui_v3f32_v3i32(ptr %x, ptr %y) {
-; LMULMAX8RV32-LABEL: fp2ui_v3f32_v3i32:
-; LMULMAX8RV32:       # %bb.0:
-; LMULMAX8RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX8RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX8RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX8RV32-NEXT:    addi a0, a1, 8
-; LMULMAX8RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX8RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX8RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX8RV32-NEXT:    ret
-;
-; LMULMAX8RV64-LABEL: fp2ui_v3f32_v3i32:
-; LMULMAX8RV64:       # %bb.0:
-; LMULMAX8RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX8RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX8RV64-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX8RV64-NEXT:    addi a0, a1, 8
-; LMULMAX8RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX8RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX8RV64-NEXT:    ret
-;
-; LMULMAX1RV32-LABEL: fp2ui_v3f32_v3i32:
-; LMULMAX1RV32:       # %bb.0:
-; LMULMAX1RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1RV32-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX1RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1RV32-NEXT:    addi a0, a1, 8
-; LMULMAX1RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX1RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1RV32-NEXT:    ret
-;
-; LMULMAX1RV64-LABEL: fp2ui_v3f32_v3i32:
-; LMULMAX1RV64:       # %bb.0:
-; LMULMAX1RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1RV64-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX1RV64-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1RV64-NEXT:    addi a0, a1, 8
-; LMULMAX1RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX1RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX1RV64-NEXT:    ret
+; CHECK-LABEL: fp2ui_v3f32_v3i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <3 x float>, ptr %x
   %d = fptoui <3 x float> %a to <3 x i32>
   store <3 x i32> %d, ptr %y

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index b60f9405a760f..b3cda0a4ac342 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -85,57 +85,15 @@ define <2 x float> @ui2fp_v2i1_v2f32(<2 x i1> %x) {
 }
 
 define void @si2fp_v3i32_v3f32(ptr %x, ptr %y) {
-; LMULMAX8RV32-LABEL: si2fp_v3i32_v3f32:
-; LMULMAX8RV32:       # %bb.0:
-; LMULMAX8RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX8RV32-NEXT:    vfcvt.f.x.v v8, v8
-; LMULMAX8RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX8RV32-NEXT:    addi a0, a1, 8
-; LMULMAX8RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX8RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX8RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX8RV32-NEXT:    ret
-;
-; LMULMAX8RV64-LABEL: si2fp_v3i32_v3f32:
-; LMULMAX8RV64:       # %bb.0:
-; LMULMAX8RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX8RV64-NEXT:    vfcvt.f.x.v v8, v8
-; LMULMAX8RV64-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX8RV64-NEXT:    addi a0, a1, 8
-; LMULMAX8RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX8RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX8RV64-NEXT:    ret
-;
-; LMULMAX1RV32-LABEL: si2fp_v3i32_v3f32:
-; LMULMAX1RV32:       # %bb.0:
-; LMULMAX1RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1RV32-NEXT:    vfcvt.f.x.v v8, v8
-; LMULMAX1RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1RV32-NEXT:    addi a0, a1, 8
-; LMULMAX1RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX1RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1RV32-NEXT:    ret
-;
-; LMULMAX1RV64-LABEL: si2fp_v3i32_v3f32:
-; LMULMAX1RV64:       # %bb.0:
-; LMULMAX1RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1RV64-NEXT:    vfcvt.f.x.v v8, v8
-; LMULMAX1RV64-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1RV64-NEXT:    addi a0, a1, 8
-; LMULMAX1RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX1RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX1RV64-NEXT:    ret
+; CHECK-LABEL: si2fp_v3i32_v3f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <3 x i32>, ptr %x
   %d = sitofp <3 x i32> %a to <3 x float>
   store <3 x float> %d, ptr %y
@@ -143,57 +101,15 @@ define void @si2fp_v3i32_v3f32(ptr %x, ptr %y) {
 }
 
 define void @ui2fp_v3i32_v3f32(ptr %x, ptr %y) {
-; LMULMAX8RV32-LABEL: ui2fp_v3i32_v3f32:
-; LMULMAX8RV32:       # %bb.0:
-; LMULMAX8RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX8RV32-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX8RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX8RV32-NEXT:    addi a0, a1, 8
-; LMULMAX8RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX8RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX8RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX8RV32-NEXT:    ret
-;
-; LMULMAX8RV64-LABEL: ui2fp_v3i32_v3f32:
-; LMULMAX8RV64:       # %bb.0:
-; LMULMAX8RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX8RV64-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX8RV64-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX8RV64-NEXT:    addi a0, a1, 8
-; LMULMAX8RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX8RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX8RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX8RV64-NEXT:    ret
-;
-; LMULMAX1RV32-LABEL: ui2fp_v3i32_v3f32:
-; LMULMAX1RV32:       # %bb.0:
-; LMULMAX1RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX1RV32-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX1RV32-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1RV32-NEXT:    addi a0, a1, 8
-; LMULMAX1RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT:    vse32.v v9, (a0)
-; LMULMAX1RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1RV32-NEXT:    vse32.v v8, (a1)
-; LMULMAX1RV32-NEXT:    ret
-;
-; LMULMAX1RV64-LABEL: ui2fp_v3i32_v3f32:
-; LMULMAX1RV64:       # %bb.0:
-; LMULMAX1RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX1RV64-NEXT:    vfcvt.f.xu.v v8, v8
-; LMULMAX1RV64-NEXT:    vslidedown.vi v9, v8, 2
-; LMULMAX1RV64-NEXT:    addi a0, a1, 8
-; LMULMAX1RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX1RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1RV64-NEXT:    vse64.v v8, (a1)
-; LMULMAX1RV64-NEXT:    ret
+; CHECK-LABEL: ui2fp_v3i32_v3f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <3 x i32>, ptr %x
   %d = uitofp <3 x i32> %a to <3 x float>
   store <3 x float> %d, ptr %y

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index 43dfd63532f68..19a0f7a4d79a0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -39,30 +39,24 @@ define void @insertelt_v4i64(ptr %x, i64 %y) {
 define void @insertelt_v3i64(ptr %x, i64 %y) {
 ; RV32-LABEL: insertelt_v3i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vsetivli zero, 3, e64, m2, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    lw a3, 16(a0)
-; RV32-NEXT:    addi a4, a0, 20
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vlse32.v v10, (a4), zero
-; RV32-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
-; RV32-NEXT:    vmv.s.x v10, a3
-; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 2
 ; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
 ; RV32-NEXT:    vslide1down.vx v10, v8, a1
 ; RV32-NEXT:    vslide1down.vx v10, v10, a2
 ; RV32-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
 ; RV32-NEXT:    vslideup.vi v8, v10, 2
-; RV32-NEXT:    sw a1, 16(a0)
-; RV32-NEXT:    sw a2, 20(a0)
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vse64.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: insertelt_v3i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    sd a1, 16(a0)
+; RV64-NEXT:    vsetivli zero, 3, e64, m2, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vmv.s.x v10, a1
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vse64.v v8, (a0)
 ; RV64-NEXT:    ret
   %a = load <3 x i64>, ptr %x, align 8
   %b = insertelement <3 x i64> %a, i64 %y, i32 2

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index e51950d1f8290..a536c121898de 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -446,30 +446,22 @@ define void @buildvec_seq2_v16i8_v2i64(ptr %x) {
 }
 
 define void @buildvec_seq_v9i8(ptr %x) {
-; RV32-LABEL: buildvec_seq_v9i8:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 3
-; RV32-NEXT:    sb a1, 8(a0)
-; RV32-NEXT:    li a1, 73
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vmv.v.i v9, 2
-; RV32-NEXT:    li a1, 36
-; RV32-NEXT:    vmv.s.x v8, a1
-; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
-; RV32-NEXT:    vmv1r.v v0, v8
-; RV32-NEXT:    vmerge.vim v8, v9, 3, v0
-; RV32-NEXT:    vse8.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: buildvec_seq_v9i8:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a1, %hi(.LCPI26_0)
-; RV64-NEXT:    ld a1, %lo(.LCPI26_0)(a1)
-; RV64-NEXT:    li a2, 3
-; RV64-NEXT:    sb a2, 8(a0)
-; RV64-NEXT:    sd a1, 0(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: buildvec_seq_v9i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 73
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 3
+; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    li a1, 146
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    vsetivli zero, 9, e8, m1, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   store <9 x i8> <i8 1, i8 2, i8 3, i8 1, i8 2, i8 3, i8 1, i8 2, i8 3>, ptr %x
   ret void
 }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
index baf5f9c98826a..05c99f04b7d14 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
@@ -549,68 +549,13 @@ define void @splat_zero_v2i32(ptr %p) {
 
 ; Not a power of two and requires more than two scalar stores.
 define void @splat_zero_v7i16(ptr %p) {
-; LMULMAX8-RV32-LABEL: splat_zero_v7i16:
-; LMULMAX8-RV32:       # %bb.0:
-; LMULMAX8-RV32-NEXT:    sh zero, 12(a0)
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8-RV32-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX8-RV32-NEXT:    addi a0, a0, 8
-; LMULMAX8-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX8-RV32-NEXT:    ret
-;
-; LMULMAX2-RV32-LABEL: splat_zero_v7i16:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    sh zero, 12(a0)
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT:    addi a0, a0, 8
-; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX1-RV32-LABEL: splat_zero_v7i16:
-; LMULMAX1-RV32:       # %bb.0:
-; LMULMAX1-RV32-NEXT:    sh zero, 12(a0)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vslidedown.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT:    addi a0, a0, 8
-; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    ret
-;
-; LMULMAX8-RV64-LABEL: splat_zero_v7i16:
-; LMULMAX8-RV64:       # %bb.0:
-; LMULMAX8-RV64-NEXT:    sh zero, 12(a0)
-; LMULMAX8-RV64-NEXT:    sw zero, 8(a0)
-; LMULMAX8-RV64-NEXT:    sd zero, 0(a0)
-; LMULMAX8-RV64-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: splat_zero_v7i16:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    sh zero, 12(a0)
-; LMULMAX2-RV64-NEXT:    sw zero, 8(a0)
-; LMULMAX2-RV64-NEXT:    sd zero, 0(a0)
-; LMULMAX2-RV64-NEXT:    ret
-;
-; LMULMAX1-RV64-LABEL: splat_zero_v7i16:
-; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    sh zero, 12(a0)
-; LMULMAX1-RV64-NEXT:    sw zero, 8(a0)
-; LMULMAX1-RV64-NEXT:    sd zero, 0(a0)
-; LMULMAX1-RV64-NEXT:    ret
+; CHECK-LABEL: splat_zero_v7i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vsetivli zero, 7, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   store <7 x i16> zeroinitializer, ptr %p
   ret void
 }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 9b40d8f963260..c660e7f8ff65e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -39,33 +39,16 @@ define void @add_v8i16(ptr %x, ptr %y) {
 }
 
 define void @add_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: add_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: add_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: add_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = add <6 x i16> %a, %b
@@ -138,33 +121,16 @@ define void @sub_v8i16(ptr %x, ptr %y) {
 }
 
 define void @sub_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: sub_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: sub_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: sub_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = sub <6 x i16> %a, %b
@@ -237,33 +203,16 @@ define void @mul_v8i16(ptr %x, ptr %y) {
 }
 
 define void @mul_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: mul_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vmul.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mul_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vmul.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mul_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmul.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = mul <6 x i16> %a, %b
@@ -336,33 +285,16 @@ define void @and_v8i16(ptr %x, ptr %y) {
 }
 
 define void @and_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: and_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: and_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: and_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vand.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = and <6 x i16> %a, %b
@@ -435,33 +367,16 @@ define void @or_v8i16(ptr %x, ptr %y) {
 }
 
 define void @or_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: or_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vor.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: or_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: or_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vor.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = or <6 x i16> %a, %b
@@ -534,33 +449,16 @@ define void @xor_v8i16(ptr %x, ptr %y) {
 }
 
 define void @xor_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: xor_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vxor.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: xor_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vxor.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: xor_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vxor.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = xor <6 x i16> %a, %b
@@ -633,33 +531,16 @@ define void @lshr_v8i16(ptr %x, ptr %y) {
 }
 
 define void @lshr_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: lshr_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vsrl.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: lshr_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vsrl.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: lshr_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vsrl.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = lshr <6 x i16> %a, %b
@@ -732,33 +613,16 @@ define void @ashr_v8i16(ptr %x, ptr %y) {
 }
 
 define void @ashr_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: ashr_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vsra.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: ashr_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vsra.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: ashr_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vsra.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = ashr <6 x i16> %a, %b
@@ -831,33 +695,16 @@ define void @shl_v8i16(ptr %x, ptr %y) {
 }
 
 define void @shl_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: shl_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vsll.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: shl_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vsll.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: shl_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vsll.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = shl <6 x i16> %a, %b
@@ -930,48 +777,23 @@ define void @sdiv_v8i16(ptr %x, ptr %y) {
 }
 
 define void @sdiv_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: sdiv_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a1)
-; RV32-NEXT:    vle16.v v9, (a0)
-; RV32-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    vslidedown.vi v11, v9, 4
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vdiv.vv v10, v11, v10
-; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT:    vslideup.vi v11, v10, 4
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vdiv.vv v8, v9, v8
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v11, 2
-; RV32-NEXT:    addi a0, a0, 8
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: sdiv_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a1)
-; RV64-NEXT:    vle16.v v9, (a0)
-; RV64-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 4
-; RV64-NEXT:    vslidedown.vi v11, v9, 4
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vdiv.vv v10, v11, v10
-; RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT:    vdiv.vv v8, v9, v8
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 4
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: sdiv_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a1)
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v10, v8, 4
+; CHECK-NEXT:    vslidedown.vi v11, v9, 4
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vdiv.vv v10, v11, v10
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vdiv.vv v8, v9, v8
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = sdiv <6 x i16> %a, %b
@@ -1044,48 +866,23 @@ define void @srem_v8i16(ptr %x, ptr %y) {
 }
 
 define void @srem_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: srem_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a1)
-; RV32-NEXT:    vle16.v v9, (a0)
-; RV32-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    vslidedown.vi v11, v9, 4
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vrem.vv v10, v11, v10
-; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT:    vslideup.vi v11, v10, 4
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vrem.vv v8, v9, v8
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v11, 2
-; RV32-NEXT:    addi a0, a0, 8
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: srem_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a1)
-; RV64-NEXT:    vle16.v v9, (a0)
-; RV64-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 4
-; RV64-NEXT:    vslidedown.vi v11, v9, 4
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vrem.vv v10, v11, v10
-; RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT:    vrem.vv v8, v9, v8
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 4
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: srem_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a1)
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v10, v8, 4
+; CHECK-NEXT:    vslidedown.vi v11, v9, 4
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vrem.vv v10, v11, v10
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vrem.vv v8, v9, v8
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = srem <6 x i16> %a, %b
@@ -1158,48 +955,23 @@ define void @udiv_v8i16(ptr %x, ptr %y) {
 }
 
 define void @udiv_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: udiv_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a1)
-; RV32-NEXT:    vle16.v v9, (a0)
-; RV32-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    vslidedown.vi v11, v9, 4
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vdivu.vv v10, v11, v10
-; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT:    vslideup.vi v11, v10, 4
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vdivu.vv v8, v9, v8
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v11, 2
-; RV32-NEXT:    addi a0, a0, 8
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: udiv_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a1)
-; RV64-NEXT:    vle16.v v9, (a0)
-; RV64-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 4
-; RV64-NEXT:    vslidedown.vi v11, v9, 4
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vdivu.vv v10, v11, v10
-; RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT:    vdivu.vv v8, v9, v8
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 4
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: udiv_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a1)
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v10, v8, 4
+; CHECK-NEXT:    vslidedown.vi v11, v9, 4
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vdivu.vv v10, v11, v10
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vdivu.vv v8, v9, v8
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = udiv <6 x i16> %a, %b
@@ -1272,48 +1044,23 @@ define void @urem_v8i16(ptr %x, ptr %y) {
 }
 
 define void @urem_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: urem_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a1)
-; RV32-NEXT:    vle16.v v9, (a0)
-; RV32-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    vslidedown.vi v11, v9, 4
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vremu.vv v10, v11, v10
-; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT:    vslideup.vi v11, v10, 4
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vremu.vv v8, v9, v8
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v11, 2
-; RV32-NEXT:    addi a0, a0, 8
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: urem_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a1)
-; RV64-NEXT:    vle16.v v9, (a0)
-; RV64-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 4
-; RV64-NEXT:    vslidedown.vi v11, v9, 4
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vremu.vv v10, v11, v10
-; RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT:    vremu.vv v8, v9, v8
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 4
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: urem_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a1)
+; CHECK-NEXT:    vle16.v v9, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v10, v8, 4
+; CHECK-NEXT:    vslidedown.vi v11, v9, 4
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vremu.vv v10, v11, v10
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vremu.vv v8, v9, v8
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %c = urem <6 x i16> %a, %b
@@ -1493,58 +1240,30 @@ define void @mulhu_v8i16(ptr %x) {
 }
 
 define void @mulhu_v6i16(ptr %x) {
-; RV32-LABEL: mulhu_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 4
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vid.v v10
-; RV32-NEXT:    vadd.vi v10, v10, 12
-; RV32-NEXT:    vdivu.vv v9, v9, v10
-; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT:    vslideup.vi v10, v9, 4
-; RV32-NEXT:    lui a1, %hi(.LCPI67_0)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI67_0)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vdivu.vv v8, v8, v9
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v10, 2
-; RV32-NEXT:    addi a0, a0, 8
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhu_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    lui a1, %hi(.LCPI67_0)
-; RV64-NEXT:    addi a1, a1, %lo(.LCPI67_0)
-; RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vdivu.vv v9, v8, v9
-; RV64-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 4
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    vadd.vi v10, v10, 12
-; RV64-NEXT:    vdivu.vv v8, v8, v10
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vslideup.vi v9, v8, 4
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v9, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v9, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
-  %a = load <6 x i16>, ptr %x
-  %b = udiv <6 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13>
-  store <6 x i16> %b, ptr %x
-  ret void
+; CHECK-LABEL: mulhu_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    lui a1, %hi(.LCPI67_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI67_0)
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vdivu.vv v9, v8, v9
+; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v8, v8, 4
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    vadd.vi v10, v10, 12
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v9, v8, 4
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v9, (a0)
+; CHECK-NEXT:    ret
+  %a = load <6 x i16>, ptr %x
+  %b = udiv <6 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13>
+  store <6 x i16> %b, ptr %x
+  ret void
 }
 
 define void @mulhu_v4i32(ptr %x) {
@@ -1716,62 +1435,30 @@ define void @mulhs_v8i16(ptr %x) {
 }
 
 define void @mulhs_v6i16(ptr %x) {
-; RV32-LABEL: mulhs_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vmv.v.i v9, 7
-; RV32-NEXT:    vid.v v10
-; RV32-NEXT:    li a1, -14
-; RV32-NEXT:    vmadd.vx v10, a1, v9
-; RV32-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 4
-; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT:    vdiv.vv v9, v9, v10
-; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT:    vslideup.vi v10, v9, 4
-; RV32-NEXT:    li a1, 6
-; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vmv.v.i v9, -7
-; RV32-NEXT:    vmerge.vim v9, v9, 7, v0
-; RV32-NEXT:    vdiv.vv v8, v8, v9
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v8, v10, 2
-; RV32-NEXT:    addi a0, a0, 8
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mulhs_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vmv.v.i v9, 7
-; RV64-NEXT:    vid.v v10
-; RV64-NEXT:    li a1, -14
-; RV64-NEXT:    vmadd.vx v10, a1, v9
-; RV64-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v9, v8, 4
-; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT:    vdiv.vv v9, v9, v10
-; RV64-NEXT:    li a1, 6
-; RV64-NEXT:    vmv.s.x v0, a1
-; RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT:    vmv.v.i v10, -7
-; RV64-NEXT:    vmerge.vim v10, v10, 7, v0
-; RV64-NEXT:    vdiv.vv v8, v8, v10
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vslideup.vi v8, v9, 4
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: mulhs_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.i v9, 7
+; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    li a1, -14
+; CHECK-NEXT:    vmadd.vx v10, a1, v9
+; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 4
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vdiv.vv v9, v9, v10
+; CHECK-NEXT:    li a1, 6
+; CHECK-NEXT:    vmv.s.x v0, a1
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v10, -7
+; CHECK-NEXT:    vmerge.vim v10, v10, 7, v0
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 4
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = sdiv <6 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7>
   store <6 x i16> %b, ptr %x
@@ -1917,33 +1604,16 @@ define void @smin_v8i16(ptr %x, ptr %y) {
 }
 
 define void @smin_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: smin_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vmin.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: smin_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vmin.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: smin_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmin.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %cc = icmp slt <6 x i16> %a, %b
@@ -2021,31 +1691,15 @@ define void @smin_vx_v8i16(ptr %x, i16 %y) {
 declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
 
 define void @smin_vx_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: smin_vx_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vmin.vx v8, v8, a1
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: smin_vx_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vmin.vx v8, v8, a1
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: smin_vx_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmin.vx v8, v8, a1
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = insertelement <6 x i16> poison, i16 %y, i32 0
   %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2105,31 +1759,15 @@ define void @smin_xv_v8i16(ptr %x, i16 %y) {
 }
 
 define void @smin_xv_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: smin_xv_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vmin.vx v8, v8, a1
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: smin_xv_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vmin.vx v8, v8, a1
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: smin_xv_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmin.vx v8, v8, a1
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = insertelement <6 x i16> poison, i16 %y, i32 0
   %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2189,33 +1827,16 @@ define void @smax_v8i16(ptr %x, ptr %y) {
 }
 
 define void @smax_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: smax_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vmax.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: smax_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vmax.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: smax_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmax.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %cc = icmp sgt <6 x i16> %a, %b
@@ -2293,31 +1914,15 @@ define void @smax_vx_v8i16(ptr %x, i16 %y) {
 declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
 
 define void @smax_vx_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: smax_vx_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vmax.vx v8, v8, a1
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: smax_vx_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vmax.vx v8, v8, a1
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: smax_vx_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmax.vx v8, v8, a1
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = insertelement <6 x i16> poison, i16 %y, i32 0
   %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2377,31 +1982,15 @@ define void @smax_xv_v8i16(ptr %x, i16 %y) {
 }
 
 define void @smax_xv_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: smax_xv_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vmax.vx v8, v8, a1
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: smax_xv_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vmax.vx v8, v8, a1
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: smax_xv_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmax.vx v8, v8, a1
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = insertelement <6 x i16> poison, i16 %y, i32 0
   %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2461,33 +2050,16 @@ define void @umin_v8i16(ptr %x, ptr %y) {
 }
 
 define void @umin_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: umin_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vminu.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: umin_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vminu.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: umin_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vminu.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %cc = icmp ult <6 x i16> %a, %b
@@ -2565,31 +2137,15 @@ define void @umin_vx_v8i16(ptr %x, i16 %y) {
 declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
 
 define void @umin_vx_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: umin_vx_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vminu.vx v8, v8, a1
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: umin_vx_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vminu.vx v8, v8, a1
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: umin_vx_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a1
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = insertelement <6 x i16> poison, i16 %y, i32 0
   %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2649,31 +2205,15 @@ define void @umin_xv_v8i16(ptr %x, i16 %y) {
 }
 
 define void @umin_xv_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: umin_xv_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vminu.vx v8, v8, a1
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: umin_xv_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vminu.vx v8, v8, a1
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: umin_xv_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vminu.vx v8, v8, a1
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = insertelement <6 x i16> poison, i16 %y, i32 0
   %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2733,33 +2273,16 @@ define void @umax_v8i16(ptr %x, ptr %y) {
 }
 
 define void @umax_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: umax_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vle16.v v9, (a1)
-; RV32-NEXT:    vmaxu.vv v8, v8, v9
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: umax_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vle16.v v9, (a1)
-; RV64-NEXT:    vmaxu.vv v8, v8, v9
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: umax_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmaxu.vv v8, v8, v9
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = load <6 x i16>, ptr %y
   %cc = icmp ugt <6 x i16> %a, %b
@@ -2837,31 +2360,15 @@ define void @umax_vx_v8i16(ptr %x, i16 %y) {
 declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
 
 define void @umax_vx_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: umax_vx_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vmaxu.vx v8, v8, a1
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: umax_vx_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vmaxu.vx v8, v8, a1
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: umax_vx_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmaxu.vx v8, v8, a1
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = insertelement <6 x i16> poison, i16 %y, i32 0
   %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2921,31 +2428,15 @@ define void @umax_xv_v8i16(ptr %x, i16 %y) {
 }
 
 define void @umax_xv_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: umax_xv_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    vmaxu.vx v8, v8, a1
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: umax_xv_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    vmaxu.vx v8, v8, a1
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: umax_xv_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmaxu.vx v8, v8, a1
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = insertelement <6 x i16> poison, i16 %y, i32 0
   %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -3110,34 +2601,16 @@ define void @add_v8i32(ptr %x, ptr %y) {
 }
 
 define void @add_v6i32(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: add_v6i32:
-; LMULMAX2-RV32:       # %bb.0:
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT:    vslidedown.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT:    addi a1, a0, 16
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT:    vse32.v v10, (a1)
-; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT:    ret
-;
-; LMULMAX2-RV64-LABEL: add_v6i32:
-; LMULMAX2-RV64:       # %bb.0:
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    vle32.v v10, (a1)
-; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT:    vslidedown.vi v10, v8, 2
-; LMULMAX2-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX2-RV64-NEXT:    vse64.v v10, (a1)
-; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT:    ret
+; LMULMAX2-LABEL: add_v6i32:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; LMULMAX2-NEXT:    vle32.v v8, (a0)
+; LMULMAX2-NEXT:    vle32.v v10, (a1)
+; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
+; LMULMAX2-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; LMULMAX2-NEXT:    vse32.v v8, (a0)
+; LMULMAX2-NEXT:    ret
 ;
 ; LMULMAX1-RV32-LABEL: add_v6i32:
 ; LMULMAX1-RV32:       # %bb.0:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index d1a4e47407a97..f0539e7566b67 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -10,7 +10,7 @@
 define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) {
 ; CHECK-LABEL: load_factor2_v3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v10, (a0)
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vid.v v8

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
index 94f189e857ed9..c0a35edc237c3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
@@ -3,101 +3,31 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s
 
 define <5 x i8> @load_v5i8(ptr %p) {
-; RV32-LABEL: load_v5i8:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: load_v5i8:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: load_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    ret
   %x = load <5 x i8>, ptr %p
   ret <5 x i8> %x
 }
 
 define <5 x i8> @load_v5i8_align1(ptr %p) {
-; RV32-LABEL: load_v5i8_align1:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lbu a1, 1(a0)
-; RV32-NEXT:    lbu a2, 0(a0)
-; RV32-NEXT:    lbu a3, 2(a0)
-; RV32-NEXT:    lbu a4, 3(a0)
-; RV32-NEXT:    slli a1, a1, 8
-; RV32-NEXT:    or a1, a1, a2
-; RV32-NEXT:    slli a3, a3, 16
-; RV32-NEXT:    slli a4, a4, 24
-; RV32-NEXT:    or a3, a4, a3
-; RV32-NEXT:    or a1, a3, a1
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vmv.s.x v8, a1
-; RV32-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 3
-; RV32-NEXT:    vmv.x.s a1, v9
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    vmv.x.s a2, v9
-; RV32-NEXT:    vslidedown.vi v9, v8, 1
-; RV32-NEXT:    vmv.x.s a3, v9
-; RV32-NEXT:    vmv.x.s a4, v8
-; RV32-NEXT:    lb a0, 4(a0)
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a4
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslidedown.vi v8, v8, 3
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: load_v5i8_align1:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lbu a1, 1(a0)
-; RV64-NEXT:    lbu a2, 0(a0)
-; RV64-NEXT:    lbu a3, 2(a0)
-; RV64-NEXT:    lb a4, 3(a0)
-; RV64-NEXT:    slli a1, a1, 8
-; RV64-NEXT:    or a1, a1, a2
-; RV64-NEXT:    slli a3, a3, 16
-; RV64-NEXT:    slli a4, a4, 24
-; RV64-NEXT:    or a3, a4, a3
-; RV64-NEXT:    or a1, a3, a1
-; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT:    vmv.s.x v8, a1
-; RV64-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; RV64-NEXT:    vslidedown.vi v9, v8, 3
-; RV64-NEXT:    vmv.x.s a1, v9
-; RV64-NEXT:    vslidedown.vi v9, v8, 2
-; RV64-NEXT:    vmv.x.s a2, v9
-; RV64-NEXT:    vslidedown.vi v9, v8, 1
-; RV64-NEXT:    vmv.x.s a3, v9
-; RV64-NEXT:    vmv.x.s a4, v8
-; RV64-NEXT:    lb a0, 4(a0)
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a4
-; RV64-NEXT:    vslide1down.vx v8, v8, a3
-; RV64-NEXT:    vslide1down.vx v8, v8, a2
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    vslidedown.vi v8, v8, 3
-; RV64-NEXT:    ret
+; CHECK-LABEL: load_v5i8_align1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    ret
   %x = load <5 x i8>, ptr %p, align 1
   ret <5 x i8> %x
 }
 
 define <6 x i8> @load_v6i8(ptr %p) {
-; RV32-LABEL: load_v6i8:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vle8.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: load_v6i8:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: load_v6i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    ret
   %x = load <6 x i8>, ptr %p
   ret <6 x i8> %x
 }
@@ -105,7 +35,7 @@ define <6 x i8> @load_v6i8(ptr %p) {
 define <12 x i8> @load_v12i8(ptr %p) {
 ; CHECK-LABEL: load_v12i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 12, e8, m1, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %x = load <12 x i8>, ptr %p
@@ -115,7 +45,7 @@ define <12 x i8> @load_v12i8(ptr %p) {
 define <6 x i16> @load_v6i16(ptr %p) {
 ; CHECK-LABEL: load_v6i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %x = load <6 x i16>, ptr %p
@@ -125,7 +55,7 @@ define <6 x i16> @load_v6i16(ptr %p) {
 define <6 x half> @load_v6f16(ptr %p) {
 ; CHECK-LABEL: load_v6f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %x = load <6 x half>, ptr %p
@@ -135,7 +65,7 @@ define <6 x half> @load_v6f16(ptr %p) {
 define <6 x float> @load_v6f32(ptr %p) {
 ; CHECK-LABEL: load_v6f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    ret
   %x = load <6 x float>, ptr %p
@@ -145,7 +75,7 @@ define <6 x float> @load_v6f32(ptr %p) {
 define <6 x double> @load_v6f64(ptr %p) {
 ; CHECK-LABEL: load_v6f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vsetivli zero, 6, e64, m4, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
 ; CHECK-NEXT:    ret
   %x = load <6 x double>, ptr %p

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index 61a358ac471a3..e6b3c25b5d935 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -5,12 +5,8 @@
 define void @store_v5i8(ptr %p, <5 x i8> %v) {
 ; CHECK-LABEL: store_v5i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v9, v8, 4
-; CHECK-NEXT:    addi a1, a0, 4
-; CHECK-NEXT:    vse8.v v9, (a1)
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   store <5 x i8> %v, ptr %p
   ret void
@@ -19,19 +15,8 @@ define void @store_v5i8(ptr %p, <5 x i8> %v) {
 define void @store_v5i8_align1(ptr %p, <5 x i8> %v) {
 ; CHECK-LABEL: store_v5i8_align1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v9, v8, 4
-; CHECK-NEXT:    addi a1, a0, 4
-; CHECK-NEXT:    vse8.v v9, (a1)
-; CHECK-NEXT:    vsetivli zero, 0, e32, mf2, ta, ma
-; CHECK-NEXT:    vmv.x.s a1, v8
-; CHECK-NEXT:    sb a1, 0(a0)
-; CHECK-NEXT:    srli a2, a1, 24
-; CHECK-NEXT:    sb a2, 3(a0)
-; CHECK-NEXT:    srli a2, a1, 16
-; CHECK-NEXT:    sb a2, 2(a0)
-; CHECK-NEXT:    srli a1, a1, 8
-; CHECK-NEXT:    sb a1, 1(a0)
+; CHECK-NEXT:    vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   store <5 x i8> %v, ptr %p, align 1
   ret void
@@ -41,110 +26,49 @@ define void @store_v5i8_align1(ptr %p, <5 x i8> %v) {
 define void @store_v6i8(ptr %p, <6 x i8> %v) {
 ; CHECK-LABEL: store_v6i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v8, 2
-; CHECK-NEXT:    addi a0, a0, 4
-; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   store <6 x i8> %v, ptr %p
   ret void
 }
 
 define void @store_v12i8(ptr %p, <12 x i8> %v) {
-; RV32-LABEL: store_v12i8:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vse8.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: store_v12i8:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: store_v12i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 12, e8, m1, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
   store <12 x i8> %v, ptr %p
   ret void
 }
 
 define void @store_v6i16(ptr %p, <6 x i16> %v) {
-; RV32-LABEL: store_v6i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: store_v6i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: store_v6i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   store <6 x i16> %v, ptr %p
   ret void
 }
 
 define void @store_v6f16(ptr %p, <6 x half> %v) {
-; RV32-LABEL: store_v6f16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT:    vslidedown.vi v9, v8, 2
-; RV32-NEXT:    addi a1, a0, 8
-; RV32-NEXT:    vse32.v v9, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: store_v6f16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vse64.v v8, (a0)
-; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT:    vslidedown.vi v8, v8, 2
-; RV64-NEXT:    addi a0, a0, 8
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: store_v6f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
   store <6 x half> %v, ptr %p
   ret void
 }
 
 define void @store_v6f32(ptr %p, <6 x float> %v) {
-; RV32-LABEL: store_v6f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    addi a1, a0, 16
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vse32.v v10, (a1)
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vse32.v v8, (a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: store_v6f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    addi a1, a0, 16
-; RV64-NEXT:    vse64.v v10, (a1)
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vse32.v v8, (a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: store_v6f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
   store <6 x float> %v, ptr %p
   ret void
 }
@@ -152,12 +76,7 @@ define void @store_v6f32(ptr %p, <6 x float> %v) {
 define void @store_v6f64(ptr %p, <6 x double> %v) {
 ; CHECK-LABEL: store_v6f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v12, v8, 4
-; CHECK-NEXT:    addi a1, a0, 32
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vse64.v v12, (a1)
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 6, e64, m4, ta, ma
 ; CHECK-NEXT:    vse64.v v8, (a0)
 ; CHECK-NEXT:    ret
   store <6 x double> %v, ptr %p

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
index 3aa8ce8f892ca..0ac09ea7f80a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
@@ -22,62 +22,20 @@ define void @widen_2xv4i16(ptr %x, ptr %z) {
 }
 
 define void @widen_3xv4i16(ptr %x, ptr %z) {
-; RV32-LABEL: widen_3xv4i16:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vle16.v v8, (a0)
-; RV32-NEXT:    addi a2, a0, 8
-; RV32-NEXT:    vle16.v v10, (a2)
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vle16.v v12, (a0)
-; RV32-NEXT:    vsetivli zero, 8, e16, m2, tu, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 4
-; RV32-NEXT:    addi a0, a1, 16
-; RV32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT:    vse16.v v12, (a0)
-; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vse16.v v8, (a1)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: widen_3xv4i16:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT:    vle16.v v8, (a0)
-; RV64-NEXT:    addi a2, a0, 8
-; RV64-NEXT:    vle16.v v10, (a2)
-; RV64-NEXT:    addi a0, a0, 16
-; RV64-NEXT:    vle16.v v12, (a0)
-; RV64-NEXT:    vsetivli zero, 8, e16, m2, tu, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 4
-; RV64-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
-; RV64-NEXT:    vslideup.vi v8, v12, 8
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    addi a0, a1, 16
-; RV64-NEXT:    vse64.v v10, (a0)
-; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT:    vse16.v v8, (a1)
-; RV64-NEXT:    ret
-;
-; ZVE64F-LABEL: widen_3xv4i16:
-; ZVE64F:       # %bb.0:
-; ZVE64F-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVE64F-NEXT:    vle16.v v8, (a0)
-; ZVE64F-NEXT:    addi a2, a0, 8
-; ZVE64F-NEXT:    vle16.v v10, (a2)
-; ZVE64F-NEXT:    addi a0, a0, 16
-; ZVE64F-NEXT:    vle16.v v12, (a0)
-; ZVE64F-NEXT:    vsetivli zero, 8, e16, m2, tu, ma
-; ZVE64F-NEXT:    vslideup.vi v8, v10, 4
-; ZVE64F-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
-; ZVE64F-NEXT:    vslideup.vi v8, v12, 8
-; ZVE64F-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
-; ZVE64F-NEXT:    vslidedown.vi v10, v8, 2
-; ZVE64F-NEXT:    addi a0, a1, 16
-; ZVE64F-NEXT:    vse64.v v10, (a0)
-; ZVE64F-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVE64F-NEXT:    vse16.v v8, (a1)
-; ZVE64F-NEXT:    ret
+; CHECK-LABEL: widen_3xv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    addi a2, a0, 8
+; CHECK-NEXT:    vle16.v v10, (a2)
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v12, 8
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
   %a = load <4 x i16>, ptr %x
   %b.gep = getelementptr i8, ptr %x, i64 8
   %b = load <4 x i16>, ptr %b.gep

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
index 0519305e6f2d2..92db0a9e545a9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
@@ -5,70 +5,65 @@
 define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vv_v6i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    lbu a2, 0(a2)
-; RV32-NEXT:    vle32.v v8, (a1)
-; RV32-NEXT:    andi a1, a2, 1
-; RV32-NEXT:    vslide1down.vx v10, v8, a1
-; RV32-NEXT:    slli a1, a2, 30
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 29
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 28
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 27
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    vle32.v v10, (a1)
+; RV32-NEXT:    andi a0, a2, 1
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v8, a0
+; RV32-NEXT:    slli a0, a2, 30
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    slli a0, a2, 29
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    slli a0, a2, 28
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    slli a0, a2, 27
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
 ; RV32-NEXT:    srli a2, a2, 5
-; RV32-NEXT:    vslide1down.vx v10, v10, a2
-; RV32-NEXT:    vslidedown.vi v10, v10, 2
-; RV32-NEXT:    vand.vi v10, v10, 1
-; RV32-NEXT:    vmsne.vi v0, v10, 0
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT:    vle32.v v8, (a0), v0.t
-; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    addi a0, a3, 16
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vse32.v v10, (a0)
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a2
+; RV32-NEXT:    vslidedown.vi v12, v12, 2
+; RV32-NEXT:    vand.vi v12, v12, 1
+; RV32-NEXT:    vmsne.vi v0, v12, 0
+; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmerge.vvm v8, v10, v8, v0
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a3)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vv_v6i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    lbu a2, 0(a2)
-; RV64-NEXT:    vle32.v v8, (a1)
-; RV64-NEXT:    andi a1, a2, 1
-; RV64-NEXT:    vslide1down.vx v10, v8, a1
-; RV64-NEXT:    slli a1, a2, 62
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 61
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 60
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 59
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    vle32.v v10, (a1)
+; RV64-NEXT:    andi a0, a2, 1
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v8, a0
+; RV64-NEXT:    slli a0, a2, 62
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    slli a0, a2, 61
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    slli a0, a2, 60
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    slli a0, a2, 59
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
 ; RV64-NEXT:    srli a2, a2, 5
-; RV64-NEXT:    vslide1down.vx v10, v10, a2
-; RV64-NEXT:    vslidedown.vi v10, v10, 2
-; RV64-NEXT:    vand.vi v10, v10, 1
-; RV64-NEXT:    vmsne.vi v0, v10, 0
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT:    vle32.v v8, (a0), v0.t
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    addi a0, a3, 16
-; RV64-NEXT:    vse64.v v10, (a0)
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a2
+; RV64-NEXT:    vslidedown.vi v12, v12, 2
+; RV64-NEXT:    vand.vi v12, v12, 1
+; RV64-NEXT:    vmsne.vi v0, v12, 0
+; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT:    vmerge.vvm v8, v10, v8, v0
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a3)
 ; RV64-NEXT:    ret
   %va = load <6 x i32>, ptr %a
@@ -82,10 +77,11 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vx_v6i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    lbu a2, 0(a2)
 ; RV32-NEXT:    vle32.v v8, (a1)
 ; RV32-NEXT:    andi a1, a2, 1
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    vslide1down.vx v10, v8, a1
 ; RV32-NEXT:    slli a1, a2, 30
 ; RV32-NEXT:    srli a1, a1, 31
@@ -106,21 +102,17 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vmerge.vxm v8, v8, a0, v0
-; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    addi a0, a3, 16
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vse32.v v10, (a0)
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a3)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vx_v6i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    lbu a2, 0(a2)
 ; RV64-NEXT:    vle32.v v8, (a1)
 ; RV64-NEXT:    andi a1, a2, 1
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    vslide1down.vx v10, v8, a1
 ; RV64-NEXT:    slli a1, a2, 62
 ; RV64-NEXT:    srli a1, a1, 63
@@ -141,11 +133,7 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    addi a0, a3, 16
-; RV64-NEXT:    vse64.v v10, (a0)
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a3)
 ; RV64-NEXT:    ret
   %vb = load <6 x i32>, ptr %b
@@ -160,10 +148,11 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
 define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vi_v6i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    lbu a1, 0(a1)
 ; RV32-NEXT:    vle32.v v8, (a0)
 ; RV32-NEXT:    andi a0, a1, 1
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    vslide1down.vx v10, v8, a0
 ; RV32-NEXT:    slli a0, a1, 30
 ; RV32-NEXT:    srli a0, a0, 31
@@ -184,21 +173,17 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vmerge.vim v8, v8, -1, v0
-; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    addi a0, a2, 16
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vse32.v v10, (a0)
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a2)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vi_v6i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    lbu a1, 0(a1)
 ; RV64-NEXT:    vle32.v v8, (a0)
 ; RV64-NEXT:    andi a0, a1, 1
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    vslide1down.vx v10, v8, a0
 ; RV64-NEXT:    slli a0, a1, 62
 ; RV64-NEXT:    srli a0, a0, 63
@@ -219,11 +204,7 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; RV64-NEXT:    vmerge.vim v8, v8, -1, v0
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    addi a0, a2, 16
-; RV64-NEXT:    vse64.v v10, (a0)
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a2)
 ; RV64-NEXT:    ret
   %vb = load <6 x i32>, ptr %b
@@ -239,70 +220,65 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
 define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vv_v6f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    lbu a2, 0(a2)
-; RV32-NEXT:    vle32.v v8, (a1)
-; RV32-NEXT:    andi a1, a2, 1
-; RV32-NEXT:    vslide1down.vx v10, v8, a1
-; RV32-NEXT:    slli a1, a2, 30
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 29
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 28
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 27
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    vle32.v v10, (a1)
+; RV32-NEXT:    andi a0, a2, 1
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v8, a0
+; RV32-NEXT:    slli a0, a2, 30
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    slli a0, a2, 29
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    slli a0, a2, 28
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
+; RV32-NEXT:    slli a0, a2, 27
+; RV32-NEXT:    srli a0, a0, 31
+; RV32-NEXT:    vslide1down.vx v12, v12, a0
 ; RV32-NEXT:    srli a2, a2, 5
-; RV32-NEXT:    vslide1down.vx v10, v10, a2
-; RV32-NEXT:    vslidedown.vi v10, v10, 2
-; RV32-NEXT:    vand.vi v10, v10, 1
-; RV32-NEXT:    vmsne.vi v0, v10, 0
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT:    vle32.v v8, (a0), v0.t
-; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    addi a0, a3, 16
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vse32.v v10, (a0)
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vslide1down.vx v12, v12, a2
+; RV32-NEXT:    vslidedown.vi v12, v12, 2
+; RV32-NEXT:    vand.vi v12, v12, 1
+; RV32-NEXT:    vmsne.vi v0, v12, 0
+; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT:    vmerge.vvm v8, v10, v8, v0
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a3)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vv_v6f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    lbu a2, 0(a2)
-; RV64-NEXT:    vle32.v v8, (a1)
-; RV64-NEXT:    andi a1, a2, 1
-; RV64-NEXT:    vslide1down.vx v10, v8, a1
-; RV64-NEXT:    slli a1, a2, 62
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 61
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 60
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 59
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    vle32.v v10, (a1)
+; RV64-NEXT:    andi a0, a2, 1
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v8, a0
+; RV64-NEXT:    slli a0, a2, 62
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    slli a0, a2, 61
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    slli a0, a2, 60
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
+; RV64-NEXT:    slli a0, a2, 59
+; RV64-NEXT:    srli a0, a0, 63
+; RV64-NEXT:    vslide1down.vx v12, v12, a0
 ; RV64-NEXT:    srli a2, a2, 5
-; RV64-NEXT:    vslide1down.vx v10, v10, a2
-; RV64-NEXT:    vslidedown.vi v10, v10, 2
-; RV64-NEXT:    vand.vi v10, v10, 1
-; RV64-NEXT:    vmsne.vi v0, v10, 0
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT:    vle32.v v8, (a0), v0.t
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    addi a0, a3, 16
-; RV64-NEXT:    vse64.v v10, (a0)
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vslide1down.vx v12, v12, a2
+; RV64-NEXT:    vslidedown.vi v12, v12, 2
+; RV64-NEXT:    vand.vi v12, v12, 1
+; RV64-NEXT:    vmsne.vi v0, v12, 0
+; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT:    vmerge.vvm v8, v10, v8, v0
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a3)
 ; RV64-NEXT:    ret
   %va = load <6 x float>, ptr %a
@@ -316,10 +292,11 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vx_v6f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    lbu a1, 0(a1)
 ; RV32-NEXT:    vle32.v v8, (a0)
 ; RV32-NEXT:    andi a0, a1, 1
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    vslide1down.vx v10, v8, a0
 ; RV32-NEXT:    slli a0, a1, 30
 ; RV32-NEXT:    srli a0, a0, 31
@@ -340,21 +317,17 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    addi a0, a2, 16
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vse32.v v10, (a0)
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a2)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vx_v6f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    lbu a1, 0(a1)
 ; RV64-NEXT:    vle32.v v8, (a0)
 ; RV64-NEXT:    andi a0, a1, 1
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    vslide1down.vx v10, v8, a0
 ; RV64-NEXT:    slli a0, a1, 62
 ; RV64-NEXT:    srli a0, a0, 63
@@ -375,11 +348,7 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; RV64-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    addi a0, a2, 16
-; RV64-NEXT:    vse64.v v10, (a0)
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a2)
 ; RV64-NEXT:    ret
   %vb = load <6 x float>, ptr %b
@@ -394,10 +363,11 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
 define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
 ; RV32-LABEL: vselect_vfpzero_v6f32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    lbu a1, 0(a1)
 ; RV32-NEXT:    vle32.v v8, (a0)
 ; RV32-NEXT:    andi a0, a1, 1
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV32-NEXT:    vslide1down.vx v10, v8, a0
 ; RV32-NEXT:    slli a0, a1, 30
 ; RV32-NEXT:    srli a0, a0, 31
@@ -418,21 +388,17 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
 ; RV32-NEXT:    vmsne.vi v0, v10, 0
 ; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vmerge.vim v8, v8, 0, v0
-; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT:    vslidedown.vi v10, v8, 4
-; RV32-NEXT:    addi a0, a2, 16
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vse32.v v10, (a0)
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV32-NEXT:    vse32.v v8, (a2)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vselect_vfpzero_v6f32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    lbu a1, 0(a1)
 ; RV64-NEXT:    vle32.v v8, (a0)
 ; RV64-NEXT:    andi a0, a1, 1
+; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 ; RV64-NEXT:    vslide1down.vx v10, v8, a0
 ; RV64-NEXT:    slli a0, a1, 62
 ; RV64-NEXT:    srli a0, a0, 63
@@ -453,11 +419,7 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
 ; RV64-NEXT:    vmsne.vi v0, v10, 0
 ; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; RV64-NEXT:    vmerge.vim v8, v8, 0, v0
-; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT:    vslidedown.vi v10, v8, 2
-; RV64-NEXT:    addi a0, a2, 16
-; RV64-NEXT:    vse64.v v10, (a0)
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
 ; RV64-NEXT:    vse32.v v8, (a2)
 ; RV64-NEXT:    ret
   %vb = load <6 x float>, ptr %b


        


More information about the llvm-commits mailing list