[llvm] [RISCV] Handle scalable ops with < EEW / 2 narrow types in combineBinOp_VLToVWBinOp_VL (PR #84158)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 11 03:04:28 PDT 2024


https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/84158

>From 74ea8fe4c16490c41d60521323c38e4f0b9c0c15 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 6 Mar 2024 15:09:35 +0800
Subject: [PATCH] [RISCV] Handle scalable ops with EEW > 2 dests in
 combineBinOp_VLToVWBinOp_VL

We can remove the restriction that the narrow type needs to be exactly EEW
/ 2 for scalable ISD::{ADD,SUB,MUL} nodes. This allows us to perform the
combine even if we can't fully fold the extend into the widening op.

VP intrinsics already do this, since they are lowered to _VL nodes which
don't have this restriction.

The "exactly EEW / 2" narrow type restriction prevented us from emitting
V{S,Z}EXT_VL nodes with i1 element types which crash when we try to select
them, since no other legal type is double the size of i1.

So to preserve this, this also restricts the combine to only run after the
legalize vector ops phase, at which point all unselectable i1 vectors
should be custom lowered away.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  14 +-
 .../RISCV/rvv/vscale-vw-web-simplification.ll |  38 +-
 llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll   | 462 +++++++++---------
 llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll   | 384 +++++++--------
 llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll   | 320 ++++++------
 5 files changed, 613 insertions(+), 605 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 08678a859ae2b6..025137707aef33 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13652,16 +13652,6 @@ struct NodeExtensionHelper {
       if (!VT.isVector())
         break;
 
-      SDValue NarrowElt = OrigOperand.getOperand(0);
-      MVT NarrowVT = NarrowElt.getSimpleValueType();
-
-      unsigned ScalarBits = VT.getScalarSizeInBits();
-      unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits();
-
-      // Ensure the extension's semantic is equivalent to rvv vzext or vsext.
-      if (ScalarBits != NarrowScalarBits * 2)
-        break;
-
       SupportsZExt = Opc == ISD::ZERO_EXTEND;
       SupportsSExt = Opc == ISD::SIGN_EXTEND;
 
@@ -14112,7 +14102,9 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
                                            TargetLowering::DAGCombinerInfo &DCI,
                                            const RISCVSubtarget &Subtarget) {
   SelectionDAG &DAG = DCI.DAG;
-  if (DCI.isBeforeLegalize())
+  // Don't perform this until types are legalized and any legal i1 types are
+  // custom lowered to avoid introducing unselectable V{S,Z}EXT_VLs.
+  if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
   if (!NodeExtensionHelper::isSupportedRoot(N))
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index 972fa66917a568..e56dca0732bb4c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -283,18 +283,19 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i8i32_multiple_users(ptr %x, ptr %y,
 ;
 ; FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users:
 ; FOLDING:       # %bb.0:
-; FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; FOLDING-NEXT:    vsetvli a3, zero, e16, mf2, ta, ma
 ; FOLDING-NEXT:    vle8.v v8, (a0)
 ; FOLDING-NEXT:    vle8.v v9, (a1)
 ; FOLDING-NEXT:    vle8.v v10, (a2)
-; FOLDING-NEXT:    vsext.vf4 v11, v8
-; FOLDING-NEXT:    vsext.vf4 v8, v9
-; FOLDING-NEXT:    vsext.vf4 v9, v10
-; FOLDING-NEXT:    vmul.vv v8, v11, v8
-; FOLDING-NEXT:    vadd.vv v10, v11, v9
-; FOLDING-NEXT:    vsub.vv v9, v11, v9
-; FOLDING-NEXT:    vor.vv v8, v8, v10
-; FOLDING-NEXT:    vor.vv v8, v8, v9
+; FOLDING-NEXT:    vsext.vf2 v11, v8
+; FOLDING-NEXT:    vsext.vf2 v8, v9
+; FOLDING-NEXT:    vsext.vf2 v9, v10
+; FOLDING-NEXT:    vwmul.vv v10, v11, v8
+; FOLDING-NEXT:    vwadd.vv v8, v11, v9
+; FOLDING-NEXT:    vwsub.vv v12, v11, v9
+; FOLDING-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; FOLDING-NEXT:    vor.vv v8, v10, v8
+; FOLDING-NEXT:    vor.vv v8, v8, v12
 ; FOLDING-NEXT:    ret
   %a = load <vscale x 2 x i8>, ptr %x
   %b = load <vscale x 2 x i8>, ptr %y
@@ -563,18 +564,19 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y,
 ;
 ; FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users:
 ; FOLDING:       # %bb.0:
-; FOLDING-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; FOLDING-NEXT:    vsetvli a3, zero, e16, mf2, ta, ma
 ; FOLDING-NEXT:    vle8.v v8, (a0)
 ; FOLDING-NEXT:    vle8.v v9, (a1)
 ; FOLDING-NEXT:    vle8.v v10, (a2)
-; FOLDING-NEXT:    vzext.vf4 v11, v8
-; FOLDING-NEXT:    vzext.vf4 v8, v9
-; FOLDING-NEXT:    vzext.vf4 v9, v10
-; FOLDING-NEXT:    vmul.vv v8, v11, v8
-; FOLDING-NEXT:    vadd.vv v10, v11, v9
-; FOLDING-NEXT:    vsub.vv v9, v11, v9
-; FOLDING-NEXT:    vor.vv v8, v8, v10
-; FOLDING-NEXT:    vor.vv v8, v8, v9
+; FOLDING-NEXT:    vzext.vf2 v11, v8
+; FOLDING-NEXT:    vzext.vf2 v8, v9
+; FOLDING-NEXT:    vzext.vf2 v9, v10
+; FOLDING-NEXT:    vwmulu.vv v10, v11, v8
+; FOLDING-NEXT:    vwaddu.vv v8, v11, v9
+; FOLDING-NEXT:    vwsubu.vv v12, v11, v9
+; FOLDING-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; FOLDING-NEXT:    vor.vv v8, v10, v8
+; FOLDING-NEXT:    vor.vv v8, v8, v12
 ; FOLDING-NEXT:    ret
   %a = load <vscale x 2 x i8>, ptr %x
   %b = load <vscale x 2 x i8>, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index a559fbf2bc8a7a..4152e61c0541ae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -421,10 +421,10 @@ define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i32(<vscale x 8 x i64> %va, i32
 define <vscale x 1 x i64> @vwadd_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
 ; CHECK-LABEL: vwadd_vv_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v8, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
   %vd = sext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
@@ -435,10 +435,10 @@ define <vscale x 1 x i64> @vwadd_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vsc
 define <vscale x 1 x i64> @vwaddu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
 ; CHECK-LABEL: vwaddu_vv_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v8, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwaddu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
   %vd = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
@@ -451,10 +451,10 @@ define <vscale x 1 x i64> @vwadd_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v8, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -469,10 +469,10 @@ define <vscale x 1 x i64> @vwaddu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v8, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwaddu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -485,9 +485,9 @@ define <vscale x 1 x i64> @vwaddu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16
 define <vscale x 1 x i64> @vwadd_wv_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, <vscale x 1 x i16> %vb) {
 ; CHECK-LABEL: vwadd_wv_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v9
-; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v9
+; CHECK-NEXT:    vwadd.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
   %vd = add <vscale x 1 x i64> %va, %vc
@@ -497,9 +497,9 @@ define <vscale x 1 x i64> @vwadd_wv_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, <vsc
 define <vscale x 1 x i64> @vwaddu_wv_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, <vscale x 1 x i16> %vb) {
 ; CHECK-LABEL: vwaddu_wv_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v9
-; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v9
+; CHECK-NEXT:    vwaddu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
   %vd = add <vscale x 1 x i64> %va, %vc
@@ -511,9 +511,9 @@ define <vscale x 1 x i64> @vwadd_wx_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v9
-; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v9
+; CHECK-NEXT:    vwadd.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -527,9 +527,9 @@ define <vscale x 1 x i64> @vwaddu_wx_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v9
-; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v9
+; CHECK-NEXT:    vwaddu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -541,10 +541,10 @@ define <vscale x 1 x i64> @vwaddu_wx_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, i16
 define <vscale x 2 x i64> @vwadd_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
 ; CHECK-LABEL: vwadd_vv_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v12, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
   %vd = sext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
@@ -555,10 +555,10 @@ define <vscale x 2 x i64> @vwadd_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vsc
 define <vscale x 2 x i64> @vwaddu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
 ; CHECK-LABEL: vwaddu_vv_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v12, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwaddu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
   %vd = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
@@ -571,10 +571,10 @@ define <vscale x 2 x i64> @vwadd_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v12, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -589,10 +589,10 @@ define <vscale x 2 x i64> @vwaddu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v12, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwaddu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -605,9 +605,9 @@ define <vscale x 2 x i64> @vwaddu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
 define <vscale x 2 x i64> @vwadd_wv_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, <vscale x 2 x i16> %vb) {
 ; CHECK-LABEL: vwadd_wv_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v10
-; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v11, v10
+; CHECK-NEXT:    vwadd.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
   %vd = add <vscale x 2 x i64> %va, %vc
@@ -617,9 +617,9 @@ define <vscale x 2 x i64> @vwadd_wv_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, <vsc
 define <vscale x 2 x i64> @vwaddu_wv_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, <vscale x 2 x i16> %vb) {
 ; CHECK-LABEL: vwaddu_wv_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v10
-; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf2 v11, v10
+; CHECK-NEXT:    vwaddu.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
   %vd = add <vscale x 2 x i64> %va, %vc
@@ -631,9 +631,9 @@ define <vscale x 2 x i64> @vwadd_wx_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v10
-; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v11, v10
+; CHECK-NEXT:    vwadd.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -647,9 +647,9 @@ define <vscale x 2 x i64> @vwaddu_wx_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v10
-; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf2 v11, v10
+; CHECK-NEXT:    vwaddu.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -661,10 +661,10 @@ define <vscale x 2 x i64> @vwaddu_wx_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, i16
 define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
 ; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v16, v9
-; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v14, v9
+; CHECK-NEXT:    vwadd.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -675,10 +675,10 @@ define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vsc
 define <vscale x 4 x i64> @vwaddu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
 ; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v8
-; CHECK-NEXT:    vzext.vf4 v16, v9
-; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v14, v9
+; CHECK-NEXT:    vwaddu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -691,10 +691,10 @@ define <vscale x 4 x i64> @vwadd_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v16, v9
-; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v14, v9
+; CHECK-NEXT:    vwadd.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -709,10 +709,10 @@ define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v8
-; CHECK-NEXT:    vzext.vf4 v16, v9
-; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v14, v9
+; CHECK-NEXT:    vwaddu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -725,9 +725,9 @@ define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 define <vscale x 4 x i64> @vwadd_wv_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, <vscale x 4 x i16> %vb) {
 ; CHECK-LABEL: vwadd_wv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v12
-; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v14, v12
+; CHECK-NEXT:    vwadd.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
   %vd = add <vscale x 4 x i64> %va, %vc
@@ -737,9 +737,9 @@ define <vscale x 4 x i64> @vwadd_wv_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, <vsc
 define <vscale x 4 x i64> @vwaddu_wv_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, <vscale x 4 x i16> %vb) {
 ; CHECK-LABEL: vwaddu_wv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v12
-; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v14, v12
+; CHECK-NEXT:    vwaddu.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
   %vd = add <vscale x 4 x i64> %va, %vc
@@ -751,9 +751,9 @@ define <vscale x 4 x i64> @vwadd_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v12
-; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v14, v12
+; CHECK-NEXT:    vwadd.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -767,9 +767,9 @@ define <vscale x 4 x i64> @vwaddu_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v12
-; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v14, v12
+; CHECK-NEXT:    vwaddu.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -781,10 +781,10 @@ define <vscale x 4 x i64> @vwaddu_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
 define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
 ; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v24, v10
-; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vsext.vf2 v20, v10
+; CHECK-NEXT:    vwadd.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -795,10 +795,10 @@ define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vsc
 define <vscale x 8 x i64> @vwaddu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
 ; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v8
-; CHECK-NEXT:    vzext.vf4 v24, v10
-; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf2 v16, v8
+; CHECK-NEXT:    vzext.vf2 v20, v10
+; CHECK-NEXT:    vwaddu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -811,10 +811,10 @@ define <vscale x 8 x i64> @vwadd_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v24, v10
-; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vsext.vf2 v20, v10
+; CHECK-NEXT:    vwadd.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -829,10 +829,10 @@ define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v8
-; CHECK-NEXT:    vzext.vf4 v24, v10
-; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf2 v16, v8
+; CHECK-NEXT:    vzext.vf2 v20, v10
+; CHECK-NEXT:    vwaddu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -845,9 +845,9 @@ define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 define <vscale x 8 x i64> @vwadd_wv_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, <vscale x 8 x i16> %vb) {
 ; CHECK-LABEL: vwadd_wv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v24, v16
-; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v20, v16
+; CHECK-NEXT:    vwadd.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
   %vd = add <vscale x 8 x i64> %va, %vc
@@ -857,9 +857,9 @@ define <vscale x 8 x i64> @vwadd_wv_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, <vsc
 define <vscale x 8 x i64> @vwaddu_wv_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, <vscale x 8 x i16> %vb) {
 ; CHECK-LABEL: vwaddu_wv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v24, v16
-; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf2 v20, v16
+; CHECK-NEXT:    vwaddu.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
   %vd = add <vscale x 8 x i64> %va, %vc
@@ -871,9 +871,9 @@ define <vscale x 8 x i64> @vwadd_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v24, v16
-; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v20, v16
+; CHECK-NEXT:    vwadd.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -887,9 +887,9 @@ define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v24, v16
-; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf2 v20, v16
+; CHECK-NEXT:    vwaddu.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -901,10 +901,10 @@ define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
 define <vscale x 1 x i64> @vwadd_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vwadd_vv_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v8, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
   %vd = sext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
@@ -915,10 +915,10 @@ define <vscale x 1 x i64> @vwadd_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscal
 define <vscale x 1 x i64> @vwaddu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vwaddu_vv_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v8, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwaddu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
   %vd = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
@@ -931,10 +931,10 @@ define <vscale x 1 x i64> @vwadd_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v8, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -949,10 +949,10 @@ define <vscale x 1 x i64> @vwaddu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v8, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwaddu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -965,9 +965,9 @@ define <vscale x 1 x i64> @vwaddu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b
 define <vscale x 1 x i64> @vwadd_wv_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vwadd_wv_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v9
-; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v9
+; CHECK-NEXT:    vwadd.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
   %vd = add <vscale x 1 x i64> %va, %vc
@@ -977,9 +977,9 @@ define <vscale x 1 x i64> @vwadd_wv_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, <vsca
 define <vscale x 1 x i64> @vwaddu_wv_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vwaddu_wv_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v9
-; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v9
+; CHECK-NEXT:    vwaddu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
   %vd = add <vscale x 1 x i64> %va, %vc
@@ -991,9 +991,9 @@ define <vscale x 1 x i64> @vwadd_wx_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v9
-; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v9
+; CHECK-NEXT:    vwadd.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -1007,9 +1007,9 @@ define <vscale x 1 x i64> @vwaddu_wx_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v9
-; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v9
+; CHECK-NEXT:    vwaddu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -1021,10 +1021,10 @@ define <vscale x 1 x i64> @vwaddu_wx_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, i8 %
 define <vscale x 2 x i64> @vwadd_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
 ; CHECK-LABEL: vwadd_vv_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v12, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
   %vd = sext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
@@ -1035,10 +1035,10 @@ define <vscale x 2 x i64> @vwadd_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscal
 define <vscale x 2 x i64> @vwaddu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
 ; CHECK-LABEL: vwaddu_vv_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v12, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwaddu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
   %vd = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
@@ -1051,10 +1051,10 @@ define <vscale x 2 x i64> @vwadd_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v12, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwadd.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -1069,10 +1069,10 @@ define <vscale x 2 x i64> @vwaddu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v12, v9
-; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwaddu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -1085,9 +1085,9 @@ define <vscale x 2 x i64> @vwaddu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b
 define <vscale x 2 x i64> @vwadd_wv_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, <vscale x 2 x i8> %vb) {
 ; CHECK-LABEL: vwadd_wv_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v10
-; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v11, v10
+; CHECK-NEXT:    vwadd.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
   %vd = add <vscale x 2 x i64> %va, %vc
@@ -1097,9 +1097,9 @@ define <vscale x 2 x i64> @vwadd_wv_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, <vsca
 define <vscale x 2 x i64> @vwaddu_wv_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, <vscale x 2 x i8> %vb) {
 ; CHECK-LABEL: vwaddu_wv_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf8 v12, v10
-; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v11, v10
+; CHECK-NEXT:    vwaddu.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
   %vd = add <vscale x 2 x i64> %va, %vc
@@ -1111,9 +1111,9 @@ define <vscale x 2 x i64> @vwadd_wx_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v10
-; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v11, v10
+; CHECK-NEXT:    vwadd.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -1127,9 +1127,9 @@ define <vscale x 2 x i64> @vwaddu_wx_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf8 v12, v10
-; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v11, v10
+; CHECK-NEXT:    vwaddu.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -1141,10 +1141,10 @@ define <vscale x 2 x i64> @vwaddu_wx_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, i8 %
 define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
 ; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v8
-; CHECK-NEXT:    vsext.vf8 v16, v9
-; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v14, v9
+; CHECK-NEXT:    vwadd.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -1155,10 +1155,10 @@ define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscal
 define <vscale x 4 x i64> @vwaddu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
 ; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf8 v12, v8
-; CHECK-NEXT:    vzext.vf8 v16, v9
-; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v14, v9
+; CHECK-NEXT:    vwaddu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -1171,10 +1171,10 @@ define <vscale x 4 x i64> @vwadd_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v8
-; CHECK-NEXT:    vsext.vf8 v16, v9
-; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v14, v9
+; CHECK-NEXT:    vwadd.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1189,10 +1189,10 @@ define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf8 v12, v8
-; CHECK-NEXT:    vzext.vf8 v16, v9
-; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v14, v9
+; CHECK-NEXT:    vwaddu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1205,9 +1205,9 @@ define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b
 define <vscale x 4 x i64> @vwadd_wv_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, <vscale x 4 x i8> %vb) {
 ; CHECK-LABEL: vwadd_wv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v12
-; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v14, v12
+; CHECK-NEXT:    vwadd.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
   %vd = add <vscale x 4 x i64> %va, %vc
@@ -1217,9 +1217,9 @@ define <vscale x 4 x i64> @vwadd_wv_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, <vsca
 define <vscale x 4 x i64> @vwaddu_wv_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, <vscale x 4 x i8> %vb) {
 ; CHECK-LABEL: vwaddu_wv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf8 v16, v12
-; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v14, v12
+; CHECK-NEXT:    vwaddu.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
   %vd = add <vscale x 4 x i64> %va, %vc
@@ -1231,9 +1231,9 @@ define <vscale x 4 x i64> @vwadd_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v12
-; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v14, v12
+; CHECK-NEXT:    vwadd.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1247,9 +1247,9 @@ define <vscale x 4 x i64> @vwaddu_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf8 v16, v12
-; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v14, v12
+; CHECK-NEXT:    vwaddu.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1261,10 +1261,10 @@ define <vscale x 4 x i64> @vwaddu_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %
 define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v8
-; CHECK-NEXT:    vsext.vf8 v24, v9
-; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v20, v9
+; CHECK-NEXT:    vwadd.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1275,10 +1275,10 @@ define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscal
 define <vscale x 8 x i64> @vwaddu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf8 v16, v8
-; CHECK-NEXT:    vzext.vf8 v24, v9
-; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v20, v9
+; CHECK-NEXT:    vwaddu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1291,10 +1291,10 @@ define <vscale x 8 x i64> @vwadd_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v8
-; CHECK-NEXT:    vsext.vf8 v24, v9
-; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v20, v9
+; CHECK-NEXT:    vwadd.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1309,10 +1309,10 @@ define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf8 v16, v8
-; CHECK-NEXT:    vzext.vf8 v24, v9
-; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v20, v9
+; CHECK-NEXT:    vwaddu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1325,9 +1325,9 @@ define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b
 define <vscale x 8 x i64> @vwadd_wv_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: vwadd_wv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v24, v16
-; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v20, v16
+; CHECK-NEXT:    vwadd.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
   %vd = add <vscale x 8 x i64> %va, %vc
@@ -1337,9 +1337,9 @@ define <vscale x 8 x i64> @vwadd_wv_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, <vsca
 define <vscale x 8 x i64> @vwaddu_wv_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: vwaddu_wv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf8 v24, v16
-; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v20, v16
+; CHECK-NEXT:    vwaddu.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
   %vd = add <vscale x 8 x i64> %va, %vc
@@ -1351,9 +1351,9 @@ define <vscale x 8 x i64> @vwadd_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v24, v16
-; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v20, v16
+; CHECK-NEXT:    vwadd.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1367,9 +1367,9 @@ define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf8 v24, v16
-; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v20, v16
+; CHECK-NEXT:    vwaddu.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1377,3 +1377,17 @@ define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %
   %vc = add <vscale x 8 x i64> %va, %vb
   ret <vscale x 8 x i64> %vc
 }
+
+; Make sure that we don't introduce any V{S,Z}EXT_VL nodes with i1 types from
+; combineBinOp_VLToVWBinOp_VL, since they can't be selected.
+define <vscale x 1 x i64> @foo(<vscale x 1 x i1> %va, <vscale x 1 x i64> %vb, ptr %p) {
+  %vc = zext <vscale x 1 x i1> %va to <vscale x 1 x i64>
+  %vd = add <vscale x 1 x i64> %vc, %vb
+
+; Introduce an illegal type so that the DAG changes after legalizing
+; types. Otherwise the legalize vector ops phase will be run immediately after
+; the legalize types phase, and the zext will already be in non-i1 form by the
+; time combineBinOp_VLToVWBinOp_VL is called.
+  store i9 42, ptr %p
+  ret <vscale x 1 x i64> %vd
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
index 3634162eefd642..539a4bdb27ad59 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
@@ -341,10 +341,10 @@ define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i3
 define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
 ; CHECK-LABEL: vwmul_vv_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
   %vd = sext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
@@ -355,10 +355,10 @@ define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vsc
 define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
 ; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwmulu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
   %vd = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
@@ -369,10 +369,10 @@ define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vs
 define <vscale x 1 x i64> @vwmulsu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
 ; CHECK-LABEL: vwmulsu_vv_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
   %vd = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
@@ -385,10 +385,10 @@ define <vscale x 1 x i64> @vwmul_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@@ -403,10 +403,10 @@ define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwmulu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@@ -421,10 +421,10 @@ define <vscale x 1 x i64> @vwmulsu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i1
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@@ -437,10 +437,10 @@ define <vscale x 1 x i64> @vwmulsu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i1
 define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
 ; CHECK-LABEL: vwmul_vv_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
   %vd = sext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
@@ -451,10 +451,10 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vsc
 define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
 ; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwmulu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
   %vd = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
@@ -465,10 +465,10 @@ define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vs
 define <vscale x 2 x i64> @vwmulsu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
 ; CHECK-LABEL: vwmulsu_vv_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
   %vd = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
@@ -481,10 +481,10 @@ define <vscale x 2 x i64> @vwmul_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@@ -499,10 +499,10 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwmulu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@@ -517,10 +517,10 @@ define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i1
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vzext.vf4 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v11, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@@ -533,10 +533,10 @@ define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i1
 define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
 ; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v14, v9
+; CHECK-NEXT:    vwmul.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -547,10 +547,10 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vsc
 define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
 ; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v8
-; CHECK-NEXT:    vzext.vf4 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v14, v9
+; CHECK-NEXT:    vwmulu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -561,10 +561,10 @@ define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vs
 define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
 ; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vzext.vf4 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v14, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -577,10 +577,10 @@ define <vscale x 4 x i64> @vwmul_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v14, v9
+; CHECK-NEXT:    vwmul.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -595,10 +595,10 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v8
-; CHECK-NEXT:    vzext.vf4 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v14, v9
+; CHECK-NEXT:    vwmulu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -613,10 +613,10 @@ define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i1
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vzext.vf4 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vzext.vf2 v14, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -629,10 +629,10 @@ define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i1
 define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
 ; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v24, v10
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vsext.vf2 v20, v10
+; CHECK-NEXT:    vwmul.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -643,10 +643,10 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vsc
 define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
 ; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v8
-; CHECK-NEXT:    vzext.vf4 v24, v10
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf2 v16, v8
+; CHECK-NEXT:    vzext.vf2 v20, v10
+; CHECK-NEXT:    vwmulu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -657,10 +657,10 @@ define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vs
 define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
 ; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vzext.vf4 v24, v10
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vzext.vf2 v20, v10
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -673,10 +673,10 @@ define <vscale x 8 x i64> @vwmul_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v24, v10
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vsext.vf2 v20, v10
+; CHECK-NEXT:    vwmul.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -691,10 +691,10 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v8
-; CHECK-NEXT:    vzext.vf4 v24, v10
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf2 v16, v8
+; CHECK-NEXT:    vzext.vf2 v20, v10
+; CHECK-NEXT:    vwmulu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -709,10 +709,10 @@ define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i1
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vzext.vf4 v24, v10
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vzext.vf2 v20, v10
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -725,10 +725,10 @@ define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i1
 define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vwmul_vv_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
   %vd = sext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
@@ -739,10 +739,10 @@ define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscal
 define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwmulu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
   %vd = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
@@ -753,10 +753,10 @@ define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vsca
 define <vscale x 1 x i64> @vwmulsu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vwmulsu_vv_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
   %vd = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
@@ -769,10 +769,10 @@ define <vscale x 1 x i64> @vwmul_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@@ -787,10 +787,10 @@ define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwmulu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@@ -805,10 +805,10 @@ define <vscale x 1 x i64> @vwmulsu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v8, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@@ -821,10 +821,10 @@ define <vscale x 1 x i64> @vwmulsu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %
 define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
 ; CHECK-LABEL: vwmul_vv_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
   %vd = sext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
@@ -835,10 +835,10 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscal
 define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
 ; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwmulu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
   %vd = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
@@ -849,10 +849,10 @@ define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vsca
 define <vscale x 2 x i64> @vwmulsu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
 ; CHECK-LABEL: vwmulsu_vv_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
   %vd = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
@@ -865,10 +865,10 @@ define <vscale x 2 x i64> @vwmul_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwmul.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@@ -883,10 +883,10 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwmulu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@@ -901,10 +901,10 @@ define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vzext.vf8 v12, v9
-; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v11, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@@ -917,10 +917,10 @@ define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %
 define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
 ; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v8
-; CHECK-NEXT:    vsext.vf8 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v14, v9
+; CHECK-NEXT:    vwmul.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -931,10 +931,10 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscal
 define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
 ; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf8 v12, v8
-; CHECK-NEXT:    vzext.vf8 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v14, v9
+; CHECK-NEXT:    vwmulu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -945,10 +945,10 @@ define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vsca
 define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
 ; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v8
-; CHECK-NEXT:    vzext.vf8 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v14, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -961,10 +961,10 @@ define <vscale x 4 x i64> @vwmul_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v8
-; CHECK-NEXT:    vsext.vf8 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v14, v9
+; CHECK-NEXT:    vwmul.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -979,10 +979,10 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf8 v12, v8
-; CHECK-NEXT:    vzext.vf8 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v14, v9
+; CHECK-NEXT:    vwmulu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -997,10 +997,10 @@ define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v8
-; CHECK-NEXT:    vzext.vf8 v16, v9
-; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v14, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -1013,10 +1013,10 @@ define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %
 define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v8
-; CHECK-NEXT:    vsext.vf8 v24, v9
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v20, v9
+; CHECK-NEXT:    vwmul.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1027,10 +1027,10 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscal
 define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf8 v16, v8
-; CHECK-NEXT:    vzext.vf8 v24, v9
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v20, v9
+; CHECK-NEXT:    vwmulu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1041,10 +1041,10 @@ define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vsca
 define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v8
-; CHECK-NEXT:    vzext.vf8 v24, v9
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v20, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1057,10 +1057,10 @@ define <vscale x 8 x i64> @vwmul_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v8
-; CHECK-NEXT:    vsext.vf8 v24, v9
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v20, v9
+; CHECK-NEXT:    vwmul.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@@ -1075,10 +1075,10 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf8 v16, v8
-; CHECK-NEXT:    vzext.vf8 v24, v9
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v20, v9
+; CHECK-NEXT:    vwmulu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@@ -1093,10 +1093,10 @@ define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v8
-; CHECK-NEXT:    vzext.vf8 v24, v9
-; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v20, v9
+; CHECK-NEXT:    vwmulsu.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
index 123469ade0ed4d..852814d648bfcf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
@@ -421,10 +421,10 @@ define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i32(<vscale x 8 x i64> %va, i32
 define <vscale x 1 x i64> @vwsub_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
 ; CHECK-LABEL: vwsub_vv_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v8, v9
-; CHECK-NEXT:    vsub.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
   %vd = sext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
@@ -451,10 +451,10 @@ define <vscale x 1 x i64> @vwsub_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v8, v9
-; CHECK-NEXT:    vsub.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -483,9 +483,9 @@ define <vscale x 1 x i64> @vwsubu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16
 define <vscale x 1 x i64> @vwsub_wv_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, <vscale x 1 x i16> %vb) {
 ; CHECK-LABEL: vwsub_wv_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v9
-; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v9
+; CHECK-NEXT:    vwsub.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
   %vd = sub <vscale x 1 x i64> %va, %vc
@@ -495,9 +495,9 @@ define <vscale x 1 x i64> @vwsub_wv_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, <vsc
 define <vscale x 1 x i64> @vwsubu_wv_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, <vscale x 1 x i16> %vb) {
 ; CHECK-LABEL: vwsubu_wv_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v9
-; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v9
+; CHECK-NEXT:    vwsubu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
   %vd = sub <vscale x 1 x i64> %va, %vc
@@ -509,9 +509,9 @@ define <vscale x 1 x i64> @vwsub_wx_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v9
-; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v9
+; CHECK-NEXT:    vwsub.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -525,9 +525,9 @@ define <vscale x 1 x i64> @vwsubu_wx_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf4 v10, v9
-; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v10, v9
+; CHECK-NEXT:    vwsubu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -539,10 +539,10 @@ define <vscale x 1 x i64> @vwsubu_wx_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, i16
 define <vscale x 2 x i64> @vwsub_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
 ; CHECK-LABEL: vwsub_vv_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v12, v9
-; CHECK-NEXT:    vsub.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
   %vd = sext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
@@ -569,10 +569,10 @@ define <vscale x 2 x i64> @vwsub_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v10, v8
-; CHECK-NEXT:    vsext.vf4 v12, v9
-; CHECK-NEXT:    vsub.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v11, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -601,9 +601,9 @@ define <vscale x 2 x i64> @vwsubu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16
 define <vscale x 2 x i64> @vwsub_wv_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, <vscale x 2 x i16> %vb) {
 ; CHECK-LABEL: vwsub_wv_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v10
-; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v11, v10
+; CHECK-NEXT:    vwsub.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
   %vd = sub <vscale x 2 x i64> %va, %vc
@@ -613,9 +613,9 @@ define <vscale x 2 x i64> @vwsub_wv_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, <vsc
 define <vscale x 2 x i64> @vwsubu_wv_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, <vscale x 2 x i16> %vb) {
 ; CHECK-LABEL: vwsubu_wv_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v10
-; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf2 v11, v10
+; CHECK-NEXT:    vwsubu.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
   %vd = sub <vscale x 2 x i64> %va, %vc
@@ -627,9 +627,9 @@ define <vscale x 2 x i64> @vwsub_wx_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v10
-; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v11, v10
+; CHECK-NEXT:    vwsub.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -643,9 +643,9 @@ define <vscale x 2 x i64> @vwsubu_wx_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf4 v12, v10
-; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf2 v11, v10
+; CHECK-NEXT:    vwsubu.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -657,10 +657,10 @@ define <vscale x 2 x i64> @vwsubu_wx_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, i16
 define <vscale x 4 x i64> @vwsub_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
 ; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v16, v9
-; CHECK-NEXT:    vsub.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v14, v9
+; CHECK-NEXT:    vwsub.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
@@ -687,10 +687,10 @@ define <vscale x 4 x i64> @vwsub_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v12, v8
-; CHECK-NEXT:    vsext.vf4 v16, v9
-; CHECK-NEXT:    vsub.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v14, v9
+; CHECK-NEXT:    vwsub.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -719,9 +719,9 @@ define <vscale x 4 x i64> @vwsubu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16
 define <vscale x 4 x i64> @vwsub_wv_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, <vscale x 4 x i16> %vb) {
 ; CHECK-LABEL: vwsub_wv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v12
-; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v14, v12
+; CHECK-NEXT:    vwsub.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
   %vd = sub <vscale x 4 x i64> %va, %vc
@@ -731,9 +731,9 @@ define <vscale x 4 x i64> @vwsub_wv_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, <vsc
 define <vscale x 4 x i64> @vwsubu_wv_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, <vscale x 4 x i16> %vb) {
 ; CHECK-LABEL: vwsubu_wv_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v12
-; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v14, v12
+; CHECK-NEXT:    vwsubu.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
   %vd = sub <vscale x 4 x i64> %va, %vc
@@ -745,9 +745,9 @@ define <vscale x 4 x i64> @vwsub_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v12
-; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v14, v12
+; CHECK-NEXT:    vwsub.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -761,9 +761,9 @@ define <vscale x 4 x i64> @vwsubu_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf4 v16, v12
-; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf2 v14, v12
+; CHECK-NEXT:    vwsubu.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -775,10 +775,10 @@ define <vscale x 4 x i64> @vwsubu_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16
 define <vscale x 8 x i64> @vwsub_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
 ; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v24, v10
-; CHECK-NEXT:    vsub.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vsext.vf2 v20, v10
+; CHECK-NEXT:    vwsub.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
@@ -805,10 +805,10 @@ define <vscale x 8 x i64> @vwsub_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v16, v8
-; CHECK-NEXT:    vsext.vf4 v24, v10
-; CHECK-NEXT:    vsub.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v16, v8
+; CHECK-NEXT:    vsext.vf2 v20, v10
+; CHECK-NEXT:    vwsub.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -837,9 +837,9 @@ define <vscale x 8 x i64> @vwsubu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16
 define <vscale x 8 x i64> @vwsub_wv_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, <vscale x 8 x i16> %vb) {
 ; CHECK-LABEL: vwsub_wv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v24, v16
-; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v20, v16
+; CHECK-NEXT:    vwsub.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
   %vd = sub <vscale x 8 x i64> %va, %vc
@@ -849,9 +849,9 @@ define <vscale x 8 x i64> @vwsub_wv_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, <vsc
 define <vscale x 8 x i64> @vwsubu_wv_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, <vscale x 8 x i16> %vb) {
 ; CHECK-LABEL: vwsubu_wv_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v24, v16
-; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf2 v20, v16
+; CHECK-NEXT:    vwsubu.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
   %vd = sub <vscale x 8 x i64> %va, %vc
@@ -863,9 +863,9 @@ define <vscale x 8 x i64> @vwsub_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf4 v24, v16
-; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v20, v16
+; CHECK-NEXT:    vwsub.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -879,9 +879,9 @@ define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf4 v24, v16
-; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf2 v20, v16
+; CHECK-NEXT:    vwsubu.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -893,10 +893,10 @@ define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16
 define <vscale x 1 x i64> @vwsub_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vwsub_vv_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v8, v9
-; CHECK-NEXT:    vsub.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
   %vd = sext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
@@ -923,10 +923,10 @@ define <vscale x 1 x i64> @vwsub_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v8, v9
-; CHECK-NEXT:    vsub.vv v8, v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -955,9 +955,9 @@ define <vscale x 1 x i64> @vwsubu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b
 define <vscale x 1 x i64> @vwsub_wv_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vwsub_wv_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v9
-; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v9
+; CHECK-NEXT:    vwsub.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
   %vd = sub <vscale x 1 x i64> %va, %vc
@@ -967,9 +967,9 @@ define <vscale x 1 x i64> @vwsub_wv_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, <vsca
 define <vscale x 1 x i64> @vwsubu_wv_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vwsubu_wv_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v9
-; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v9
+; CHECK-NEXT:    vwsubu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
   %vd = sub <vscale x 1 x i64> %va, %vc
@@ -981,9 +981,9 @@ define <vscale x 1 x i64> @vwsub_wx_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v9
-; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v9
+; CHECK-NEXT:    vwsub.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -997,9 +997,9 @@ define <vscale x 1 x i64> @vwsubu_wx_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vzext.vf8 v10, v9
-; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v9
+; CHECK-NEXT:    vwsubu.wv v8, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -1011,10 +1011,10 @@ define <vscale x 1 x i64> @vwsubu_wx_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, i8 %
 define <vscale x 2 x i64> @vwsub_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
 ; CHECK-LABEL: vwsub_vv_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v12, v9
-; CHECK-NEXT:    vsub.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
   %vd = sext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
@@ -1041,10 +1041,10 @@ define <vscale x 2 x i64> @vwsub_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v10, v8
-; CHECK-NEXT:    vsext.vf8 v12, v9
-; CHECK-NEXT:    vsub.vv v8, v10, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v11, v9
+; CHECK-NEXT:    vwsub.vv v8, v10, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -1073,9 +1073,9 @@ define <vscale x 2 x i64> @vwsubu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b
 define <vscale x 2 x i64> @vwsub_wv_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, <vscale x 2 x i8> %vb) {
 ; CHECK-LABEL: vwsub_wv_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v10
-; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v11, v10
+; CHECK-NEXT:    vwsub.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
   %vd = sub <vscale x 2 x i64> %va, %vc
@@ -1085,9 +1085,9 @@ define <vscale x 2 x i64> @vwsub_wv_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, <vsca
 define <vscale x 2 x i64> @vwsubu_wv_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, <vscale x 2 x i8> %vb) {
 ; CHECK-LABEL: vwsubu_wv_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf8 v12, v10
-; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v11, v10
+; CHECK-NEXT:    vwsubu.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
   %vd = sub <vscale x 2 x i64> %va, %vc
@@ -1099,9 +1099,9 @@ define <vscale x 2 x i64> @vwsub_wx_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v10
-; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v11, v10
+; CHECK-NEXT:    vwsub.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -1115,9 +1115,9 @@ define <vscale x 2 x i64> @vwsubu_wx_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vzext.vf8 v12, v10
-; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v11, v10
+; CHECK-NEXT:    vwsubu.wv v8, v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -1129,10 +1129,10 @@ define <vscale x 2 x i64> @vwsubu_wx_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, i8 %
 define <vscale x 4 x i64> @vwsub_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
 ; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v8
-; CHECK-NEXT:    vsext.vf8 v16, v9
-; CHECK-NEXT:    vsub.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v14, v9
+; CHECK-NEXT:    vwsub.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
   %vd = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
@@ -1159,10 +1159,10 @@ define <vscale x 4 x i64> @vwsub_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v12, v8
-; CHECK-NEXT:    vsext.vf8 v16, v9
-; CHECK-NEXT:    vsub.vv v8, v12, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v14, v9
+; CHECK-NEXT:    vwsub.vv v8, v12, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1191,9 +1191,9 @@ define <vscale x 4 x i64> @vwsubu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b
 define <vscale x 4 x i64> @vwsub_wv_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, <vscale x 4 x i8> %vb) {
 ; CHECK-LABEL: vwsub_wv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v12
-; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v14, v12
+; CHECK-NEXT:    vwsub.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
   %vd = sub <vscale x 4 x i64> %va, %vc
@@ -1203,9 +1203,9 @@ define <vscale x 4 x i64> @vwsub_wv_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, <vsca
 define <vscale x 4 x i64> @vwsubu_wv_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, <vscale x 4 x i8> %vb) {
 ; CHECK-LABEL: vwsubu_wv_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf8 v16, v12
-; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v14, v12
+; CHECK-NEXT:    vwsubu.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
   %vd = sub <vscale x 4 x i64> %va, %vc
@@ -1217,9 +1217,9 @@ define <vscale x 4 x i64> @vwsub_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v12
-; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v14, v12
+; CHECK-NEXT:    vwsub.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1233,9 +1233,9 @@ define <vscale x 4 x i64> @vwsubu_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vmv.v.x v12, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vzext.vf8 v16, v12
-; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v14, v12
+; CHECK-NEXT:    vwsubu.wv v8, v8, v14
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -1247,10 +1247,10 @@ define <vscale x 4 x i64> @vwsubu_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %
 define <vscale x 8 x i64> @vwsub_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v8
-; CHECK-NEXT:    vsext.vf8 v24, v9
-; CHECK-NEXT:    vsub.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v20, v9
+; CHECK-NEXT:    vwsub.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
   %vd = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
@@ -1277,10 +1277,10 @@ define <vscale x 8 x i64> @vwsub_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v8
-; CHECK-NEXT:    vsext.vf8 v24, v9
-; CHECK-NEXT:    vsub.vv v8, v16, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v20, v9
+; CHECK-NEXT:    vwsub.vv v8, v16, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1309,9 +1309,9 @@ define <vscale x 8 x i64> @vwsubu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b
 define <vscale x 8 x i64> @vwsub_wv_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: vwsub_wv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v24, v16
-; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v20, v16
+; CHECK-NEXT:    vwsub.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %vc = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
   %vd = sub <vscale x 8 x i64> %va, %vc
@@ -1321,9 +1321,9 @@ define <vscale x 8 x i64> @vwsub_wv_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, <vsca
 define <vscale x 8 x i64> @vwsubu_wv_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: vwsubu_wv_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf8 v24, v16
-; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v20, v16
+; CHECK-NEXT:    vwsubu.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %vc = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
   %vd = sub <vscale x 8 x i64> %va, %vc
@@ -1335,9 +1335,9 @@ define <vscale x 8 x i64> @vwsub_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %b
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v24, v16
-; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v20, v16
+; CHECK-NEXT:    vwsub.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -1351,9 +1351,9 @@ define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v16, a0
-; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vzext.vf8 v24, v16
-; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v20, v16
+; CHECK-NEXT:    vwsubu.wv v8, v8, v20
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer



More information about the llvm-commits mailing list