[llvm] bec7ad9 - [RISCV] Add tests for vw{add,sub,mul} with nested extend. NFC

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 5 23:57:37 PST 2024


Author: Luke Lau
Date: 2024-03-06T15:56:02+08:00
New Revision: bec7ad9fd6bceb6521448b24faddb01bc52de3a7

URL: https://github.com/llvm/llvm-project/commit/bec7ad9fd6bceb6521448b24faddb01bc52de3a7
DIFF: https://github.com/llvm/llvm-project/commit/bec7ad9fd6bceb6521448b24faddb01bc52de3a7.diff

LOG: [RISCV] Add tests for vw{add,sub,mul} with nested extend. NFC

These test cases show (op (ext a), (ext b)) patterns where the dest EEW is
more than 2 * source EEW. These could be lowered into widening ops where we
still have extend the operands, but at a smaller EEW.

Added: 
    

Modified: 
    llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
    llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
    llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index 89e69565b39e3f..a559fbf2bc8a7a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
-define <vscale x 1 x i64> @vwadd_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwadd_vv_nxv1i64:
+define <vscale x 1 x i64> @vwadd_vv_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwadd.vv v10, v8, v9
@@ -15,8 +15,8 @@ define <vscale x 1 x i64> @vwadd_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwaddu_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwaddu_vv_nxv1i64:
+define <vscale x 1 x i64> @vwaddu_vv_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v10, v8, v9
@@ -28,8 +28,8 @@ define <vscale x 1 x i64> @vwaddu_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwadd_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwadd_vx_nxv1i64:
+define <vscale x 1 x i64> @vwadd_vx_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwadd_vx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwadd.vx v9, v8, a0
@@ -43,8 +43,8 @@ define <vscale x 1 x i64> @vwadd_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwaddu_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwaddu_vx_nxv1i64:
+define <vscale x 1 x i64> @vwaddu_vx_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwaddu.vx v9, v8, a0
@@ -58,8 +58,8 @@ define <vscale x 1 x i64> @vwaddu_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwadd_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwadd_wv_nxv1i64:
+define <vscale x 1 x i64> @vwadd_wv_nxv1i64_nxv1i32(<vscale x 1 x i64> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwadd.wv v8, v8, v9
@@ -69,8 +69,8 @@ define <vscale x 1 x i64> @vwadd_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
   ret <vscale x 1 x i64> %vd
 }
 
-define <vscale x 1 x i64> @vwaddu_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwaddu_wv_nxv1i64:
+define <vscale x 1 x i64> @vwaddu_wv_nxv1i64_nxv1i32(<vscale x 1 x i64> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwaddu.wv v8, v8, v9
@@ -80,8 +80,8 @@ define <vscale x 1 x i64> @vwaddu_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
   ret <vscale x 1 x i64> %vd
 }
 
-define <vscale x 1 x i64> @vwadd_wx_nxv1i64(<vscale x 1 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwadd_wx_nxv1i64:
+define <vscale x 1 x i64> @vwadd_wx_nxv1i64_nxv1i32(<vscale x 1 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwadd_wx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwadd.wx v8, v8, a0
@@ -93,8 +93,8 @@ define <vscale x 1 x i64> @vwadd_wx_nxv1i64(<vscale x 1 x i64> %va, i32 %b) {
   ret <vscale x 1 x i64> %vc
 }
 
-define <vscale x 1 x i64> @vwaddu_wx_nxv1i64(<vscale x 1 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwaddu_wx_nxv1i64:
+define <vscale x 1 x i64> @vwaddu_wx_nxv1i64_nxv1i32(<vscale x 1 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwaddu.wx v8, v8, a0
@@ -106,8 +106,8 @@ define <vscale x 1 x i64> @vwaddu_wx_nxv1i64(<vscale x 1 x i64> %va, i32 %b) {
   ret <vscale x 1 x i64> %vc
 }
 
-define <vscale x 2 x i64> @vwadd_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwadd_vv_nxv2i64:
+define <vscale x 2 x i64> @vwadd_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwadd.vv v10, v8, v9
@@ -119,8 +119,8 @@ define <vscale x 2 x i64> @vwadd_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwaddu_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwaddu_vv_nxv2i64:
+define <vscale x 2 x i64> @vwaddu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v10, v8, v9
@@ -132,8 +132,8 @@ define <vscale x 2 x i64> @vwaddu_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwadd_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwadd_vx_nxv2i64:
+define <vscale x 2 x i64> @vwadd_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwadd_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwadd.vx v10, v8, a0
@@ -147,8 +147,8 @@ define <vscale x 2 x i64> @vwadd_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwaddu_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwaddu_vx_nxv2i64:
+define <vscale x 2 x i64> @vwaddu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwaddu.vx v10, v8, a0
@@ -162,8 +162,8 @@ define <vscale x 2 x i64> @vwaddu_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwadd_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwadd_wv_nxv2i64:
+define <vscale x 2 x i64> @vwadd_wv_nxv2i64_nxv2i32(<vscale x 2 x i64> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwadd.wv v8, v8, v10
@@ -173,8 +173,8 @@ define <vscale x 2 x i64> @vwadd_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
   ret <vscale x 2 x i64> %vd
 }
 
-define <vscale x 2 x i64> @vwaddu_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwaddu_wv_nxv2i64:
+define <vscale x 2 x i64> @vwaddu_wv_nxv2i64_nxv2i32(<vscale x 2 x i64> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwaddu.wv v8, v8, v10
@@ -184,8 +184,8 @@ define <vscale x 2 x i64> @vwaddu_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
   ret <vscale x 2 x i64> %vd
 }
 
-define <vscale x 2 x i64> @vwadd_wx_nxv2i64(<vscale x 2 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwadd_wx_nxv2i64:
+define <vscale x 2 x i64> @vwadd_wx_nxv2i64_nxv2i32(<vscale x 2 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwadd_wx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwadd.wx v8, v8, a0
@@ -197,8 +197,8 @@ define <vscale x 2 x i64> @vwadd_wx_nxv2i64(<vscale x 2 x i64> %va, i32 %b) {
   ret <vscale x 2 x i64> %vc
 }
 
-define <vscale x 2 x i64> @vwaddu_wx_nxv2i64(<vscale x 2 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwaddu_wx_nxv2i64:
+define <vscale x 2 x i64> @vwaddu_wx_nxv2i64_nxv2i32(<vscale x 2 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwaddu.wx v8, v8, a0
@@ -210,8 +210,8 @@ define <vscale x 2 x i64> @vwaddu_wx_nxv2i64(<vscale x 2 x i64> %va, i32 %b) {
   ret <vscale x 2 x i64> %vc
 }
 
-define <vscale x 4 x i64> @vwadd_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwadd_vv_nxv4i64:
+define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwadd.vv v12, v8, v10
@@ -223,8 +223,8 @@ define <vscale x 4 x i64> @vwadd_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwaddu_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwaddu_vv_nxv4i64:
+define <vscale x 4 x i64> @vwaddu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v12, v8, v10
@@ -236,8 +236,8 @@ define <vscale x 4 x i64> @vwaddu_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwadd_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwadd_vx_nxv4i64:
+define <vscale x 4 x i64> @vwadd_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwadd_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwadd.vx v12, v8, a0
@@ -251,8 +251,8 @@ define <vscale x 4 x i64> @vwadd_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwaddu_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwaddu_vx_nxv4i64:
+define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwaddu.vx v12, v8, a0
@@ -266,8 +266,8 @@ define <vscale x 4 x i64> @vwaddu_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwadd_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwadd_wv_nxv4i64:
+define <vscale x 4 x i64> @vwadd_wv_nxv4i64_nxv4i32(<vscale x 4 x i64> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwadd.wv v8, v8, v12
@@ -277,8 +277,8 @@ define <vscale x 4 x i64> @vwadd_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
   ret <vscale x 4 x i64> %vd
 }
 
-define <vscale x 4 x i64> @vwaddu_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwaddu_wv_nxv4i64:
+define <vscale x 4 x i64> @vwaddu_wv_nxv4i64_nxv4i32(<vscale x 4 x i64> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwaddu.wv v8, v8, v12
@@ -288,8 +288,8 @@ define <vscale x 4 x i64> @vwaddu_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
   ret <vscale x 4 x i64> %vd
 }
 
-define <vscale x 4 x i64> @vwadd_wx_nxv4i64(<vscale x 4 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwadd_wx_nxv4i64:
+define <vscale x 4 x i64> @vwadd_wx_nxv4i64_nxv4i32(<vscale x 4 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwadd_wx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwadd.wx v8, v8, a0
@@ -301,8 +301,8 @@ define <vscale x 4 x i64> @vwadd_wx_nxv4i64(<vscale x 4 x i64> %va, i32 %b) {
   ret <vscale x 4 x i64> %vc
 }
 
-define <vscale x 4 x i64> @vwaddu_wx_nxv4i64(<vscale x 4 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwaddu_wx_nxv4i64:
+define <vscale x 4 x i64> @vwaddu_wx_nxv4i64_nxv4i32(<vscale x 4 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwaddu.wx v8, v8, a0
@@ -314,8 +314,8 @@ define <vscale x 4 x i64> @vwaddu_wx_nxv4i64(<vscale x 4 x i64> %va, i32 %b) {
   ret <vscale x 4 x i64> %vc
 }
 
-define <vscale x 8 x i64> @vwadd_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwadd_vv_nxv8i64:
+define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwadd.vv v16, v8, v12
@@ -327,8 +327,8 @@ define <vscale x 8 x i64> @vwadd_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwaddu_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwaddu_vv_nxv8i64:
+define <vscale x 8 x i64> @vwaddu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwaddu.vv v16, v8, v12
@@ -340,8 +340,8 @@ define <vscale x 8 x i64> @vwaddu_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwadd_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwadd_vx_nxv8i64:
+define <vscale x 8 x i64> @vwadd_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwadd_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwadd.vx v16, v8, a0
@@ -355,8 +355,8 @@ define <vscale x 8 x i64> @vwadd_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwaddu_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwaddu_vx_nxv8i64:
+define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwaddu.vx v16, v8, a0
@@ -370,8 +370,8 @@ define <vscale x 8 x i64> @vwaddu_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwadd_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwadd_wv_nxv8i64:
+define <vscale x 8 x i64> @vwadd_wv_nxv8i64_nxv8i32(<vscale x 8 x i64> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwadd.wv v8, v8, v16
@@ -381,8 +381,8 @@ define <vscale x 8 x i64> @vwadd_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
   ret <vscale x 8 x i64> %vd
 }
 
-define <vscale x 8 x i64> @vwaddu_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwaddu_wv_nxv8i64:
+define <vscale x 8 x i64> @vwaddu_wv_nxv8i64_nxv8i32(<vscale x 8 x i64> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwaddu.wv v8, v8, v16
@@ -392,8 +392,8 @@ define <vscale x 8 x i64> @vwaddu_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
   ret <vscale x 8 x i64> %vd
 }
 
-define <vscale x 8 x i64> @vwadd_wx_nxv8i64(<vscale x 8 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwadd_wx_nxv8i64:
+define <vscale x 8 x i64> @vwadd_wx_nxv8i64_nxv8i32(<vscale x 8 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwadd_wx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwadd.wx v8, v8, a0
@@ -405,8 +405,8 @@ define <vscale x 8 x i64> @vwadd_wx_nxv8i64(<vscale x 8 x i64> %va, i32 %b) {
   ret <vscale x 8 x i64> %vc
 }
 
-define <vscale x 8 x i64> @vwaddu_wx_nxv8i64(<vscale x 8 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwaddu_wx_nxv8i64:
+define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i32(<vscale x 8 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwaddu.wx v8, v8, a0
@@ -417,3 +417,963 @@ define <vscale x 8 x i64> @vwaddu_wx_nxv8i64(<vscale x 8 x i64> %va, i32 %b) {
   %vc = add <vscale x 8 x i64> %va, %vb
   ret <vscale x 8 x i64> %vc
 }
+
+define <vscale x 1 x i64> @vwadd_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v8, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %ve = add <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwaddu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v8, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %ve = add <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwadd_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwadd_vx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v8, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %ve = add <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwaddu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v8, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %ve = add <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwadd_wv_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v9
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %vd = add <vscale x 1 x i64> %va, %vc
+  ret <vscale x 1 x i64> %vd
+}
+
+define <vscale x 1 x i64> @vwaddu_wv_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v9
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %vd = add <vscale x 1 x i64> %va, %vc
+  ret <vscale x 1 x i64> %vd
+}
+
+define <vscale x 1 x i64> @vwadd_wx_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwadd_wx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v9
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %vb = sext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %vc = add <vscale x 1 x i64> %va, %vb
+  ret <vscale x 1 x i64> %vc
+}
+
+define <vscale x 1 x i64> @vwaddu_wx_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v9
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %vb = zext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %vc = add <vscale x 1 x i64> %va, %vb
+  ret <vscale x 1 x i64> %vc
+}
+
+define <vscale x 2 x i64> @vwadd_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %ve = add <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwaddu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v12, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %ve = add <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwadd_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwadd_vx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %ve = add <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwaddu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v12, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %ve = add <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwadd_wv_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %vd = add <vscale x 2 x i64> %va, %vc
+  ret <vscale x 2 x i64> %vd
+}
+
+define <vscale x 2 x i64> @vwaddu_wv_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %vd = add <vscale x 2 x i64> %va, %vc
+  ret <vscale x 2 x i64> %vd
+}
+
+define <vscale x 2 x i64> @vwadd_wx_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwadd_wx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %vb = sext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %vc = add <vscale x 2 x i64> %va, %vb
+  ret <vscale x 2 x i64> %vc
+}
+
+define <vscale x 2 x i64> @vwaddu_wx_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %vb = zext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %vc = add <vscale x 2 x i64> %va, %vb
+  ret <vscale x 2 x i64> %vc
+}
+
+define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %ve = add <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwaddu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v16, v9
+; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %ve = add <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwadd_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwadd_vx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %ve = add <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v16, v9
+; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %ve = add <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwadd_wv_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v12
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %vd = add <vscale x 4 x i64> %va, %vc
+  ret <vscale x 4 x i64> %vd
+}
+
+define <vscale x 4 x i64> @vwaddu_wv_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v12
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %vd = add <vscale x 4 x i64> %va, %vc
+  ret <vscale x 4 x i64> %vd
+}
+
+define <vscale x 4 x i64> @vwadd_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwadd_wx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v12
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %vb = sext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %vc = add <vscale x 4 x i64> %va, %vb
+  ret <vscale x 4 x i64> %vc
+}
+
+define <vscale x 4 x i64> @vwaddu_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v12
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %vb = zext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %vc = add <vscale x 4 x i64> %va, %vb
+  ret <vscale x 4 x i64> %vc
+}
+
+define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v24, v10
+; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %ve = add <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwaddu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v24, v10
+; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %ve = add <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwadd_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwadd_vx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v24, v10
+; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %ve = add <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v24, v10
+; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %ve = add <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwadd_wv_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v24, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %vd = add <vscale x 8 x i64> %va, %vc
+  ret <vscale x 8 x i64> %vd
+}
+
+define <vscale x 8 x i64> @vwaddu_wv_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf4 v24, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %vd = add <vscale x 8 x i64> %va, %vc
+  ret <vscale x 8 x i64> %vd
+}
+
+define <vscale x 8 x i64> @vwadd_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwadd_wx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v24, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %vb = sext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %vc = add <vscale x 8 x i64> %va, %vb
+  ret <vscale x 8 x i64> %vc
+}
+
+define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf4 v24, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %vb = zext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %vc = add <vscale x 8 x i64> %va, %vb
+  ret <vscale x 8 x i64> %vc
+}
+
+define <vscale x 1 x i64> @vwadd_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v8, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %ve = add <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwaddu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v8, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %ve = add <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwadd_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwadd_vx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v8, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %ve = add <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwaddu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v8, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %ve = add <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwadd_wv_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v9
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %vd = add <vscale x 1 x i64> %va, %vc
+  ret <vscale x 1 x i64> %vd
+}
+
+define <vscale x 1 x i64> @vwaddu_wv_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v9
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %vd = add <vscale x 1 x i64> %va, %vc
+  ret <vscale x 1 x i64> %vd
+}
+
+define <vscale x 1 x i64> @vwadd_wx_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwadd_wx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v9
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %vb = sext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %vc = add <vscale x 1 x i64> %va, %vb
+  ret <vscale x 1 x i64> %vc
+}
+
+define <vscale x 1 x i64> @vwaddu_wx_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v9
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %vb = zext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %vc = add <vscale x 1 x i64> %va, %vb
+  ret <vscale x 1 x i64> %vc
+}
+
+define <vscale x 2 x i64> @vwadd_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v12, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %ve = add <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwaddu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v12, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %ve = add <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwadd_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwadd_vx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v12, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %ve = add <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwaddu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v12, v9
+; CHECK-NEXT:    vadd.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %ve = add <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwadd_wv_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %vd = add <vscale x 2 x i64> %va, %vc
+  ret <vscale x 2 x i64> %vd
+}
+
+define <vscale x 2 x i64> @vwaddu_wv_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf8 v12, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %vd = add <vscale x 2 x i64> %va, %vc
+  ret <vscale x 2 x i64> %vd
+}
+
+define <vscale x 2 x i64> @vwadd_wx_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwadd_wx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %vb = sext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %vc = add <vscale x 2 x i64> %va, %vb
+  ret <vscale x 2 x i64> %vc
+}
+
+define <vscale x 2 x i64> @vwaddu_wx_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf8 v12, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %vb = zext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %vc = add <vscale x 2 x i64> %va, %vb
+  ret <vscale x 2 x i64> %vc
+}
+
+define <vscale x 4 x i64> @vwadd_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v8
+; CHECK-NEXT:    vsext.vf8 v16, v9
+; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %ve = add <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwaddu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf8 v12, v8
+; CHECK-NEXT:    vzext.vf8 v16, v9
+; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %ve = add <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwadd_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwadd_vx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v8
+; CHECK-NEXT:    vsext.vf8 v16, v9
+; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %ve = add <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwaddu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf8 v12, v8
+; CHECK-NEXT:    vzext.vf8 v16, v9
+; CHECK-NEXT:    vadd.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %ve = add <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwadd_wv_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v12
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %vd = add <vscale x 4 x i64> %va, %vc
+  ret <vscale x 4 x i64> %vd
+}
+
+define <vscale x 4 x i64> @vwaddu_wv_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf8 v16, v12
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %vd = add <vscale x 4 x i64> %va, %vc
+  ret <vscale x 4 x i64> %vd
+}
+
+define <vscale x 4 x i64> @vwadd_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwadd_wx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v12
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %vb = sext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %vc = add <vscale x 4 x i64> %va, %vb
+  ret <vscale x 4 x i64> %vc
+}
+
+define <vscale x 4 x i64> @vwaddu_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf8 v16, v12
+; CHECK-NEXT:    vadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %vb = zext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %vc = add <vscale x 4 x i64> %va, %vb
+  ret <vscale x 4 x i64> %vc
+}
+
+define <vscale x 8 x i64> @vwadd_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwadd_vv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v8
+; CHECK-NEXT:    vsext.vf8 v24, v9
+; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %ve = add <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwaddu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwaddu_vv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf8 v16, v8
+; CHECK-NEXT:    vzext.vf8 v24, v9
+; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %ve = add <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwadd_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwadd_vx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v8
+; CHECK-NEXT:    vsext.vf8 v24, v9
+; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %ve = add <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwaddu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwaddu_vx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf8 v16, v8
+; CHECK-NEXT:    vzext.vf8 v24, v9
+; CHECK-NEXT:    vadd.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %ve = add <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwadd_wv_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwadd_wv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v24, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %vd = add <vscale x 8 x i64> %va, %vc
+  ret <vscale x 8 x i64> %vd
+}
+
+define <vscale x 8 x i64> @vwaddu_wv_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwaddu_wv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf8 v24, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %vd = add <vscale x 8 x i64> %va, %vc
+  ret <vscale x 8 x i64> %vd
+}
+
+define <vscale x 8 x i64> @vwadd_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwadd_wx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v24, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %vb = sext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %vc = add <vscale x 8 x i64> %va, %vb
+  ret <vscale x 8 x i64> %vc
+}
+
+define <vscale x 8 x i64> @vwaddu_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwaddu_wx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf8 v24, v16
+; CHECK-NEXT:    vadd.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %vb = zext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %vc = add <vscale x 8 x i64> %va, %vb
+  ret <vscale x 8 x i64> %vc
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
index 98918ea2165231..3634162eefd642 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
-define <vscale x 1 x i64> @vwmul_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwmul_vv_nxv1i64:
+define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwmul.vv v10, v8, v9
@@ -15,8 +15,8 @@ define <vscale x 1 x i64> @vwmul_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwmulu_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwmulu_vv_nxv1i64:
+define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwmulu.vv v10, v8, v9
@@ -28,8 +28,8 @@ define <vscale x 1 x i64> @vwmulu_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwmulsu_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwmulsu_vv_nxv1i64:
+define <vscale x 1 x i64> @vwmulsu_vv_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwmulsu.vv v10, v8, v9
@@ -41,8 +41,8 @@ define <vscale x 1 x i64> @vwmulsu_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwmul_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmul_vx_nxv1i64:
+define <vscale x 1 x i64> @vwmul_vx_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmul_vx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwmul.vx v9, v8, a0
@@ -56,8 +56,8 @@ define <vscale x 1 x i64> @vwmul_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwmulu_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmulu_vx_nxv1i64:
+define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwmulu.vx v9, v8, a0
@@ -71,8 +71,8 @@ define <vscale x 1 x i64> @vwmulu_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwmulsu_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmulsu_vx_nxv1i64:
+define <vscale x 1 x i64> @vwmulsu_vx_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwmulsu.vx v9, v8, a0
@@ -86,8 +86,8 @@ define <vscale x 1 x i64> @vwmulsu_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwmul_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwmul_vv_nxv2i64:
+define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwmul.vv v10, v8, v9
@@ -99,8 +99,8 @@ define <vscale x 2 x i64> @vwmul_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwmulu_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwmulu_vv_nxv2i64:
+define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwmulu.vv v10, v8, v9
@@ -112,8 +112,8 @@ define <vscale x 2 x i64> @vwmulu_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwmulsu_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwmulsu_vv_nxv2i64:
+define <vscale x 2 x i64> @vwmulsu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwmulsu.vv v10, v8, v9
@@ -125,8 +125,8 @@ define <vscale x 2 x i64> @vwmulsu_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwmul_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmul_vx_nxv2i64:
+define <vscale x 2 x i64> @vwmul_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmul_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwmul.vx v10, v8, a0
@@ -140,8 +140,8 @@ define <vscale x 2 x i64> @vwmul_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwmulu_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmulu_vx_nxv2i64:
+define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwmulu.vx v10, v8, a0
@@ -155,8 +155,8 @@ define <vscale x 2 x i64> @vwmulu_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmulsu_vx_nxv2i64:
+define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwmulsu.vx v10, v8, a0
@@ -170,8 +170,8 @@ define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwmul_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwmul_vv_nxv4i64:
+define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwmul.vv v12, v8, v10
@@ -183,8 +183,8 @@ define <vscale x 4 x i64> @vwmul_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwmulu_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwmulu_vv_nxv4i64:
+define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwmulu.vv v12, v8, v10
@@ -196,8 +196,8 @@ define <vscale x 4 x i64> @vwmulu_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwmulsu_vv_nxv4i64:
+define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwmulsu.vv v12, v8, v10
@@ -209,8 +209,8 @@ define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwmul_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmul_vx_nxv4i64:
+define <vscale x 4 x i64> @vwmul_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmul_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwmul.vx v12, v8, a0
@@ -224,8 +224,8 @@ define <vscale x 4 x i64> @vwmul_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwmulu_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmulu_vx_nxv4i64:
+define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwmulu.vx v12, v8, a0
@@ -239,8 +239,8 @@ define <vscale x 4 x i64> @vwmulu_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmulsu_vx_nxv4i64:
+define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwmulsu.vx v12, v8, a0
@@ -254,8 +254,8 @@ define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwmul_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwmul_vv_nxv8i64:
+define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwmul.vv v16, v8, v12
@@ -267,8 +267,8 @@ define <vscale x 8 x i64> @vwmul_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwmulu_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwmulu_vv_nxv8i64:
+define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwmulu.vv v16, v8, v12
@@ -280,8 +280,8 @@ define <vscale x 8 x i64> @vwmulu_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwmulsu_vv_nxv8i64:
+define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwmulsu.vv v16, v8, v12
@@ -293,8 +293,8 @@ define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwmul_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmul_vx_nxv8i64:
+define <vscale x 8 x i64> @vwmul_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmul_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwmul.vx v16, v8, a0
@@ -308,8 +308,8 @@ define <vscale x 8 x i64> @vwmul_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwmulu_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmulu_vx_nxv8i64:
+define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwmulu.vx v16, v8, a0
@@ -323,8 +323,8 @@ define <vscale x 8 x i64> @vwmulu_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwmulsu_vx_nxv8i64:
+define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwmulsu.vx v16, v8, a0
@@ -337,3 +337,771 @@ define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
   %ve = mul <vscale x 8 x i64> %vc, %vd
   ret <vscale x 8 x i64> %ve
 }
+
+define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmulsu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmul_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmul_vx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+  %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+  %vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmulsu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+  %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmulsu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmul_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmul_vx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+  %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+  %vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vzext.vf4 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+  %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmul_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmul_vx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+  %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+  %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vzext.vf4 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+  %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v24, v10
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v24, v10
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v24, v10
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmul_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmul_vx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v24, v10
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v24, v10
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vzext.vf4 v24, v10
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> undef, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmul_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmulu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmulsu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmul_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmul_vx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+  %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmulu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+  %vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwmulsu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v8, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+  %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %ve = mul <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmul_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmulu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmulsu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmul_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmul_vx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+  %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmulu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+  %vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwmulsu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vzext.vf8 v12, v9
+; CHECK-NEXT:    vmul.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+  %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %ve = mul <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmul_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v8
+; CHECK-NEXT:    vsext.vf8 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmulu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf8 v12, v8
+; CHECK-NEXT:    vzext.vf8 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmulsu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v8
+; CHECK-NEXT:    vzext.vf8 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmul_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmul_vx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v8
+; CHECK-NEXT:    vsext.vf8 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+  %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmulu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf8 v12, v8
+; CHECK-NEXT:    vzext.vf8 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+  %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwmulsu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v8
+; CHECK-NEXT:    vzext.vf8 v16, v9
+; CHECK-NEXT:    vmul.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+  %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %ve = mul <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmul_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwmul_vv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v8
+; CHECK-NEXT:    vsext.vf8 v24, v9
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmulu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwmulu_vv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf8 v16, v8
+; CHECK-NEXT:    vzext.vf8 v24, v9
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmulsu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwmulsu_vv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v8
+; CHECK-NEXT:    vzext.vf8 v24, v9
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmul_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmul_vx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v8
+; CHECK-NEXT:    vsext.vf8 v24, v9
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+  %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmulu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmulu_vx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf8 v16, v8
+; CHECK-NEXT:    vzext.vf8 v24, v9
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+  %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwmulsu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwmulsu_vx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v8
+; CHECK-NEXT:    vzext.vf8 v24, v9
+; CHECK-NEXT:    vmul.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> undef, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+  %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %ve = mul <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
index a6a15c16434360..123469ade0ed4d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
-define <vscale x 1 x i64> @vwsub_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwsub_vv_nxv1i64:
+define <vscale x 1 x i64> @vwsub_vv_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwsub.vv v10, v8, v9
@@ -15,8 +15,8 @@ define <vscale x 1 x i64> @vwsub_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwsubu_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwsubu_vv_nxv1i64:
+define <vscale x 1 x i64> @vwsubu_vv_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwsubu.vv v10, v8, v9
@@ -28,8 +28,8 @@ define <vscale x 1 x i64> @vwsubu_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwsub_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwsub_vx_nxv1i64:
+define <vscale x 1 x i64> @vwsub_vx_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwsub_vx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwsub.vx v9, v8, a0
@@ -43,8 +43,8 @@ define <vscale x 1 x i64> @vwsub_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwsubu_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwsubu_vx_nxv1i64:
+define <vscale x 1 x i64> @vwsubu_vx_nxv1i64_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwsubu.vx v9, v8, a0
@@ -58,8 +58,8 @@ define <vscale x 1 x i64> @vwsubu_vx_nxv1i64(<vscale x 1 x i32> %va, i32 %b) {
   ret <vscale x 1 x i64> %ve
 }
 
-define <vscale x 1 x i64> @vwsub_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwsub_wv_nxv1i64:
+define <vscale x 1 x i64> @vwsub_wv_nxv1i64_nxv1i32(<vscale x 1 x i64> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwsub.wv v8, v8, v9
@@ -69,8 +69,8 @@ define <vscale x 1 x i64> @vwsub_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
   ret <vscale x 1 x i64> %vd
 }
 
-define <vscale x 1 x i64> @vwsubu_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i32> %vb) {
-; CHECK-LABEL: vwsubu_wv_nxv1i64:
+define <vscale x 1 x i64> @vwsubu_wv_nxv1i64_nxv1i32(<vscale x 1 x i64> %va, <vscale x 1 x i32> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwsubu.wv v8, v8, v9
@@ -80,8 +80,8 @@ define <vscale x 1 x i64> @vwsubu_wv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
   ret <vscale x 1 x i64> %vd
 }
 
-define <vscale x 1 x i64> @vwsub_wx_nxv1i64(<vscale x 1 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwsub_wx_nxv1i64:
+define <vscale x 1 x i64> @vwsub_wx_nxv1i64_nxv1i32(<vscale x 1 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwsub_wx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwsub.wx v8, v8, a0
@@ -93,8 +93,8 @@ define <vscale x 1 x i64> @vwsub_wx_nxv1i64(<vscale x 1 x i64> %va, i32 %b) {
   ret <vscale x 1 x i64> %vc
 }
 
-define <vscale x 1 x i64> @vwsubu_wx_nxv1i64(<vscale x 1 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwsubu_wx_nxv1i64:
+define <vscale x 1 x i64> @vwsubu_wx_nxv1i64_nxv1i32(<vscale x 1 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
 ; CHECK-NEXT:    vwsubu.wx v8, v8, a0
@@ -106,8 +106,8 @@ define <vscale x 1 x i64> @vwsubu_wx_nxv1i64(<vscale x 1 x i64> %va, i32 %b) {
   ret <vscale x 1 x i64> %vc
 }
 
-define <vscale x 2 x i64> @vwsub_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwsub_vv_nxv2i64:
+define <vscale x 2 x i64> @vwsub_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwsub.vv v10, v8, v9
@@ -119,8 +119,8 @@ define <vscale x 2 x i64> @vwsub_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwsubu_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwsubu_vv_nxv2i64:
+define <vscale x 2 x i64> @vwsubu_vv_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwsubu.vv v10, v8, v9
@@ -132,8 +132,8 @@ define <vscale x 2 x i64> @vwsubu_vv_nxv2i64(<vscale x 2 x i32> %va, <vscale x 2
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwsub_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwsub_vx_nxv2i64:
+define <vscale x 2 x i64> @vwsub_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwsub_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwsub.vx v10, v8, a0
@@ -147,8 +147,8 @@ define <vscale x 2 x i64> @vwsub_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwsubu_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwsubu_vx_nxv2i64:
+define <vscale x 2 x i64> @vwsubu_vx_nxv2i64_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwsubu.vx v10, v8, a0
@@ -162,8 +162,8 @@ define <vscale x 2 x i64> @vwsubu_vx_nxv2i64(<vscale x 2 x i32> %va, i32 %b) {
   ret <vscale x 2 x i64> %ve
 }
 
-define <vscale x 2 x i64> @vwsub_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwsub_wv_nxv2i64:
+define <vscale x 2 x i64> @vwsub_wv_nxv2i64_nxv2i32(<vscale x 2 x i64> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwsub.wv v8, v8, v10
@@ -173,8 +173,8 @@ define <vscale x 2 x i64> @vwsub_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
   ret <vscale x 2 x i64> %vd
 }
 
-define <vscale x 2 x i64> @vwsubu_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i32> %vb) {
-; CHECK-LABEL: vwsubu_wv_nxv2i64:
+define <vscale x 2 x i64> @vwsubu_wv_nxv2i64_nxv2i32(<vscale x 2 x i64> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwsubu.wv v8, v8, v10
@@ -184,8 +184,8 @@ define <vscale x 2 x i64> @vwsubu_wv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
   ret <vscale x 2 x i64> %vd
 }
 
-define <vscale x 2 x i64> @vwsub_wx_nxv2i64(<vscale x 2 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwsub_wx_nxv2i64:
+define <vscale x 2 x i64> @vwsub_wx_nxv2i64_nxv2i32(<vscale x 2 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwsub_wx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwsub.wx v8, v8, a0
@@ -197,8 +197,8 @@ define <vscale x 2 x i64> @vwsub_wx_nxv2i64(<vscale x 2 x i64> %va, i32 %b) {
   ret <vscale x 2 x i64> %vc
 }
 
-define <vscale x 2 x i64> @vwsubu_wx_nxv2i64(<vscale x 2 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwsubu_wx_nxv2i64:
+define <vscale x 2 x i64> @vwsubu_wx_nxv2i64_nxv2i32(<vscale x 2 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vwsubu.wx v8, v8, a0
@@ -210,8 +210,8 @@ define <vscale x 2 x i64> @vwsubu_wx_nxv2i64(<vscale x 2 x i64> %va, i32 %b) {
   ret <vscale x 2 x i64> %vc
 }
 
-define <vscale x 4 x i64> @vwsub_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwsub_vv_nxv4i64:
+define <vscale x 4 x i64> @vwsub_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwsub.vv v12, v8, v10
@@ -223,8 +223,8 @@ define <vscale x 4 x i64> @vwsub_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwsubu_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwsubu_vv_nxv4i64:
+define <vscale x 4 x i64> @vwsubu_vv_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwsubu.vv v12, v8, v10
@@ -236,8 +236,8 @@ define <vscale x 4 x i64> @vwsubu_vv_nxv4i64(<vscale x 4 x i32> %va, <vscale x 4
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwsub_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwsub_vx_nxv4i64:
+define <vscale x 4 x i64> @vwsub_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwsub_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwsub.vx v12, v8, a0
@@ -251,8 +251,8 @@ define <vscale x 4 x i64> @vwsub_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwsubu_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwsubu_vx_nxv4i64:
+define <vscale x 4 x i64> @vwsubu_vx_nxv4i64_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwsubu.vx v12, v8, a0
@@ -266,8 +266,8 @@ define <vscale x 4 x i64> @vwsubu_vx_nxv4i64(<vscale x 4 x i32> %va, i32 %b) {
   ret <vscale x 4 x i64> %ve
 }
 
-define <vscale x 4 x i64> @vwsub_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwsub_wv_nxv4i64:
+define <vscale x 4 x i64> @vwsub_wv_nxv4i64_nxv4i32(<vscale x 4 x i64> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwsub.wv v8, v8, v12
@@ -277,8 +277,8 @@ define <vscale x 4 x i64> @vwsub_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
   ret <vscale x 4 x i64> %vd
 }
 
-define <vscale x 4 x i64> @vwsubu_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i32> %vb) {
-; CHECK-LABEL: vwsubu_wv_nxv4i64:
+define <vscale x 4 x i64> @vwsubu_wv_nxv4i64_nxv4i32(<vscale x 4 x i64> %va, <vscale x 4 x i32> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwsubu.wv v8, v8, v12
@@ -288,8 +288,8 @@ define <vscale x 4 x i64> @vwsubu_wv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
   ret <vscale x 4 x i64> %vd
 }
 
-define <vscale x 4 x i64> @vwsub_wx_nxv4i64(<vscale x 4 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwsub_wx_nxv4i64:
+define <vscale x 4 x i64> @vwsub_wx_nxv4i64_nxv4i32(<vscale x 4 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwsub_wx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwsub.wx v8, v8, a0
@@ -301,8 +301,8 @@ define <vscale x 4 x i64> @vwsub_wx_nxv4i64(<vscale x 4 x i64> %va, i32 %b) {
   ret <vscale x 4 x i64> %vc
 }
 
-define <vscale x 4 x i64> @vwsubu_wx_nxv4i64(<vscale x 4 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwsubu_wx_nxv4i64:
+define <vscale x 4 x i64> @vwsubu_wx_nxv4i64_nxv4i32(<vscale x 4 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vwsubu.wx v8, v8, a0
@@ -314,8 +314,8 @@ define <vscale x 4 x i64> @vwsubu_wx_nxv4i64(<vscale x 4 x i64> %va, i32 %b) {
   ret <vscale x 4 x i64> %vc
 }
 
-define <vscale x 8 x i64> @vwsub_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwsub_vv_nxv8i64:
+define <vscale x 8 x i64> @vwsub_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwsub.vv v16, v8, v12
@@ -327,8 +327,8 @@ define <vscale x 8 x i64> @vwsub_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwsubu_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwsubu_vv_nxv8i64:
+define <vscale x 8 x i64> @vwsubu_vv_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwsubu.vv v16, v8, v12
@@ -340,8 +340,8 @@ define <vscale x 8 x i64> @vwsubu_vv_nxv8i64(<vscale x 8 x i32> %va, <vscale x 8
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwsub_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwsub_vx_nxv8i64:
+define <vscale x 8 x i64> @vwsub_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwsub_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwsub.vx v16, v8, a0
@@ -355,8 +355,8 @@ define <vscale x 8 x i64> @vwsub_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwsubu_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
-; CHECK-LABEL: vwsubu_vx_nxv8i64:
+define <vscale x 8 x i64> @vwsubu_vx_nxv8i64_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwsubu.vx v16, v8, a0
@@ -370,8 +370,8 @@ define <vscale x 8 x i64> @vwsubu_vx_nxv8i64(<vscale x 8 x i32> %va, i32 %b) {
   ret <vscale x 8 x i64> %ve
 }
 
-define <vscale x 8 x i64> @vwsub_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwsub_wv_nxv8i64:
+define <vscale x 8 x i64> @vwsub_wv_nxv8i64_nxv8i32(<vscale x 8 x i64> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwsub.wv v8, v8, v16
@@ -381,8 +381,8 @@ define <vscale x 8 x i64> @vwsub_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
   ret <vscale x 8 x i64> %vd
 }
 
-define <vscale x 8 x i64> @vwsubu_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i32> %vb) {
-; CHECK-LABEL: vwsubu_wv_nxv8i64:
+define <vscale x 8 x i64> @vwsubu_wv_nxv8i64_nxv8i32(<vscale x 8 x i64> %va, <vscale x 8 x i32> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwsubu.wv v8, v8, v16
@@ -392,8 +392,8 @@ define <vscale x 8 x i64> @vwsubu_wv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
   ret <vscale x 8 x i64> %vd
 }
 
-define <vscale x 8 x i64> @vwsub_wx_nxv8i64(<vscale x 8 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwsub_wx_nxv8i64:
+define <vscale x 8 x i64> @vwsub_wx_nxv8i64_nxv8i32(<vscale x 8 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwsub_wx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwsub.wx v8, v8, a0
@@ -405,8 +405,8 @@ define <vscale x 8 x i64> @vwsub_wx_nxv8i64(<vscale x 8 x i64> %va, i32 %b) {
   ret <vscale x 8 x i64> %vc
 }
 
-define <vscale x 8 x i64> @vwsubu_wx_nxv8i64(<vscale x 8 x i64> %va, i32 %b) {
-; CHECK-LABEL: vwsubu_wx_nxv8i64:
+define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i32(<vscale x 8 x i64> %va, i32 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vwsubu.wx v8, v8, a0
@@ -417,3 +417,947 @@ define <vscale x 8 x i64> @vwsubu_wx_nxv8i64(<vscale x 8 x i64> %va, i32 %b) {
   %vc = sub <vscale x 8 x i64> %va, %vb
   ret <vscale x 8 x i64> %vc
 }
+
+define <vscale x 1 x i64> @vwsub_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v8, v9
+; CHECK-NEXT:    vsub.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %ve = sub <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwsubu_vv_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vwsubu.vv v10, v8, v9
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %ve = sub <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwsub_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwsub_vx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v8, v9
+; CHECK-NEXT:    vsub.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %vc = sext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %ve = sub <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwsubu_vx_nxv1i64_nxv1i16(<vscale x 1 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vwsubu.vx v9, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %vc = zext <vscale x 1 x i16> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %ve = sub <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwsub_wv_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v9
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %vd = sub <vscale x 1 x i64> %va, %vc
+  ret <vscale x 1 x i64> %vd
+}
+
+define <vscale x 1 x i64> @vwsubu_wv_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, <vscale x 1 x i16> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v9
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 1 x i16> %vb to <vscale x 1 x i64>
+  %vd = sub <vscale x 1 x i64> %va, %vc
+  ret <vscale x 1 x i64> %vd
+}
+
+define <vscale x 1 x i64> @vwsub_wx_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwsub_wx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v9
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %vb = sext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %vc = sub <vscale x 1 x i64> %va, %vb
+  ret <vscale x 1 x i64> %vc
+}
+
+define <vscale x 1 x i64> @vwsubu_wx_nxv1i64_nxv1i16(<vscale x 1 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv1i64_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf4 v10, v9
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %vb = zext <vscale x 1 x i16> %splat to <vscale x 1 x i64>
+  %vc = sub <vscale x 1 x i64> %va, %vb
+  ret <vscale x 1 x i64> %vc
+}
+
+define <vscale x 2 x i64> @vwsub_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vsub.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %ve = sub <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwsubu_vv_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vwsubu.vv v10, v8, v9
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %ve = sub <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwsub_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwsub_vx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vsub.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %vc = sext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %ve = sub <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwsubu_vx_nxv2i64_nxv2i16(<vscale x 2 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vwsubu.vx v10, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %vc = zext <vscale x 2 x i16> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %ve = sub <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwsub_wv_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %vd = sub <vscale x 2 x i64> %va, %vc
+  ret <vscale x 2 x i64> %vd
+}
+
+define <vscale x 2 x i64> @vwsubu_wv_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 2 x i16> %vb to <vscale x 2 x i64>
+  %vd = sub <vscale x 2 x i64> %va, %vc
+  ret <vscale x 2 x i64> %vd
+}
+
+define <vscale x 2 x i64> @vwsub_wx_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwsub_wx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %vb = sext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %vc = sub <vscale x 2 x i64> %va, %vb
+  ret <vscale x 2 x i64> %vc
+}
+
+define <vscale x 2 x i64> @vwsubu_wx_nxv2i64_nxv2i16(<vscale x 2 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv2i64_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf4 v12, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %vb = zext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+  %vc = sub <vscale x 2 x i64> %va, %vb
+  ret <vscale x 2 x i64> %vc
+}
+
+define <vscale x 4 x i64> @vwsub_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vsub.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %ve = sub <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwsubu_vv_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vwsubu.vv v12, v8, v9
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %ve = sub <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwsub_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwsub_vx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v12, v8
+; CHECK-NEXT:    vsext.vf4 v16, v9
+; CHECK-NEXT:    vsub.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %vc = sext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %ve = sub <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwsubu_vx_nxv4i64_nxv4i16(<vscale x 4 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vwsubu.vx v12, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf2 v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %vc = zext <vscale x 4 x i16> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %ve = sub <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwsub_wv_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v12
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %vd = sub <vscale x 4 x i64> %va, %vc
+  ret <vscale x 4 x i64> %vd
+}
+
+define <vscale x 4 x i64> @vwsubu_wv_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, <vscale x 4 x i16> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v12
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 4 x i16> %vb to <vscale x 4 x i64>
+  %vd = sub <vscale x 4 x i64> %va, %vc
+  ret <vscale x 4 x i64> %vd
+}
+
+define <vscale x 4 x i64> @vwsub_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwsub_wx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v12
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %vb = sext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %vc = sub <vscale x 4 x i64> %va, %vb
+  ret <vscale x 4 x i64> %vc
+}
+
+define <vscale x 4 x i64> @vwsubu_wx_nxv4i64_nxv4i16(<vscale x 4 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv4i64_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf4 v16, v12
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %vb = zext <vscale x 4 x i16> %splat to <vscale x 4 x i64>
+  %vc = sub <vscale x 4 x i64> %va, %vb
+  ret <vscale x 4 x i64> %vc
+}
+
+define <vscale x 8 x i64> @vwsub_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v24, v10
+; CHECK-NEXT:    vsub.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %ve = sub <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwsubu_vv_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vwsubu.vv v16, v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %ve = sub <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwsub_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwsub_vx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v16, v8
+; CHECK-NEXT:    vsext.vf4 v24, v10
+; CHECK-NEXT:    vsub.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %vc = sext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %ve = sub <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwsubu_vx_nxv8i64_nxv8i16(<vscale x 8 x i16> %va, i16 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vwsubu.vx v16, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf2 v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %vc = zext <vscale x 8 x i16> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %ve = sub <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwsub_wv_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v24, v16
+; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %vd = sub <vscale x 8 x i64> %va, %vc
+  ret <vscale x 8 x i64> %vd
+}
+
+define <vscale x 8 x i64> @vwsubu_wv_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, <vscale x 8 x i16> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf4 v24, v16
+; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 8 x i16> %vb to <vscale x 8 x i64>
+  %vd = sub <vscale x 8 x i64> %va, %vc
+  ret <vscale x 8 x i64> %vd
+}
+
+define <vscale x 8 x i64> @vwsub_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwsub_wx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v24, v16
+; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %vb = sext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %vc = sub <vscale x 8 x i64> %va, %vb
+  ret <vscale x 8 x i64> %vc
+}
+
+define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i16(<vscale x 8 x i64> %va, i16 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv8i64_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf4 v24, v16
+; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i16> poison, i16 %b, i16 0
+  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %vb = zext <vscale x 8 x i16> %splat to <vscale x 8 x i64>
+  %vc = sub <vscale x 8 x i64> %va, %vb
+  ret <vscale x 8 x i64> %vc
+}
+
+define <vscale x 1 x i64> @vwsub_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v8, v9
+; CHECK-NEXT:    vsub.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %ve = sub <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwsubu_vv_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vwsubu.vv v10, v8, v9
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v8, v10
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %ve = sub <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwsub_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwsub_vx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v8, v9
+; CHECK-NEXT:    vsub.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %vc = sext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = sext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %ve = sub <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwsubu_vx_nxv1i64_nxv1i8(<vscale x 1 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vwsubu.vx v9, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf4 v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %vc = zext <vscale x 1 x i8> %va to <vscale x 1 x i64>
+  %vd = zext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %ve = sub <vscale x 1 x i64> %vc, %vd
+  ret <vscale x 1 x i64> %ve
+}
+
+define <vscale x 1 x i64> @vwsub_wv_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v9
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %vd = sub <vscale x 1 x i64> %va, %vc
+  ret <vscale x 1 x i64> %vd
+}
+
+define <vscale x 1 x i64> @vwsubu_wv_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, <vscale x 1 x i8> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v9
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 1 x i8> %vb to <vscale x 1 x i64>
+  %vd = sub <vscale x 1 x i64> %va, %vc
+  ret <vscale x 1 x i64> %vd
+}
+
+define <vscale x 1 x i64> @vwsub_wx_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwsub_wx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v9
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %vb = sext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %vc = sub <vscale x 1 x i64> %va, %vb
+  ret <vscale x 1 x i64> %vc
+}
+
+define <vscale x 1 x i64> @vwsubu_wx_nxv1i64_nxv1i8(<vscale x 1 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv1i64_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vzext.vf8 v10, v9
+; CHECK-NEXT:    vsub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %vb = zext <vscale x 1 x i8> %splat to <vscale x 1 x i64>
+  %vc = sub <vscale x 1 x i64> %va, %vb
+  ret <vscale x 1 x i64> %vc
+}
+
+define <vscale x 2 x i64> @vwsub_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v12, v9
+; CHECK-NEXT:    vsub.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %ve = sub <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwsubu_vv_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vwsubu.vv v10, v8, v9
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v8, v10
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %ve = sub <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwsub_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwsub_vx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v10, v8
+; CHECK-NEXT:    vsext.vf8 v12, v9
+; CHECK-NEXT:    vsub.vv v8, v10, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %vc = sext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = sext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %ve = sub <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwsubu_vx_nxv2i64_nxv2i8(<vscale x 2 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vwsubu.vx v10, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf4 v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %vc = zext <vscale x 2 x i8> %va to <vscale x 2 x i64>
+  %vd = zext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %ve = sub <vscale x 2 x i64> %vc, %vd
+  ret <vscale x 2 x i64> %ve
+}
+
+define <vscale x 2 x i64> @vwsub_wv_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %vd = sub <vscale x 2 x i64> %va, %vc
+  ret <vscale x 2 x i64> %vd
+}
+
+define <vscale x 2 x i64> @vwsubu_wv_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf8 v12, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 2 x i8> %vb to <vscale x 2 x i64>
+  %vd = sub <vscale x 2 x i64> %va, %vc
+  ret <vscale x 2 x i64> %vd
+}
+
+define <vscale x 2 x i64> @vwsub_wx_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwsub_wx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %vb = sext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %vc = sub <vscale x 2 x i64> %va, %vb
+  ret <vscale x 2 x i64> %vc
+}
+
+define <vscale x 2 x i64> @vwsubu_wx_nxv2i64_nxv2i8(<vscale x 2 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv2i64_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vzext.vf8 v12, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %vb = zext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+  %vc = sub <vscale x 2 x i64> %va, %vb
+  ret <vscale x 2 x i64> %vc
+}
+
+define <vscale x 4 x i64> @vwsub_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v8
+; CHECK-NEXT:    vsext.vf8 v16, v9
+; CHECK-NEXT:    vsub.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %ve = sub <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwsubu_vv_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vwsubu.vv v12, v8, v9
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %ve = sub <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwsub_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwsub_vx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v12, v8
+; CHECK-NEXT:    vsext.vf8 v16, v9
+; CHECK-NEXT:    vsub.vv v8, v12, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %vc = sext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = sext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %ve = sub <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwsubu_vx_nxv4i64_nxv4i8(<vscale x 4 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vwsubu.vx v12, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf4 v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %vc = zext <vscale x 4 x i8> %va to <vscale x 4 x i64>
+  %vd = zext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %ve = sub <vscale x 4 x i64> %vc, %vd
+  ret <vscale x 4 x i64> %ve
+}
+
+define <vscale x 4 x i64> @vwsub_wv_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v12
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %vd = sub <vscale x 4 x i64> %va, %vc
+  ret <vscale x 4 x i64> %vd
+}
+
+define <vscale x 4 x i64> @vwsubu_wv_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, <vscale x 4 x i8> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf8 v16, v12
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 4 x i8> %vb to <vscale x 4 x i64>
+  %vd = sub <vscale x 4 x i64> %va, %vc
+  ret <vscale x 4 x i64> %vd
+}
+
+define <vscale x 4 x i64> @vwsub_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwsub_wx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v12
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %vb = sext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %vc = sub <vscale x 4 x i64> %va, %vb
+  ret <vscale x 4 x i64> %vc
+}
+
+define <vscale x 4 x i64> @vwsubu_wx_nxv4i64_nxv4i8(<vscale x 4 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv4i64_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vzext.vf8 v16, v12
+; CHECK-NEXT:    vsub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %vb = zext <vscale x 4 x i8> %splat to <vscale x 4 x i64>
+  %vc = sub <vscale x 4 x i64> %va, %vb
+  ret <vscale x 4 x i64> %vc
+}
+
+define <vscale x 8 x i64> @vwsub_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwsub_vv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v8
+; CHECK-NEXT:    vsext.vf8 v24, v9
+; CHECK-NEXT:    vsub.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %ve = sub <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwsubu_vv_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwsubu_vv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vwsubu.vv v16, v8, v9
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %ve = sub <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwsub_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwsub_vx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v16, v8
+; CHECK-NEXT:    vsext.vf8 v24, v9
+; CHECK-NEXT:    vsub.vv v8, v16, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %vc = sext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = sext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %ve = sub <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwsubu_vx_nxv8i64_nxv8i8(<vscale x 8 x i8> %va, i8 %b) {
+; CHECK-LABEL: vwsubu_vx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vwsubu.vx v16, v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf4 v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %vc = zext <vscale x 8 x i8> %va to <vscale x 8 x i64>
+  %vd = zext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %ve = sub <vscale x 8 x i64> %vc, %vd
+  ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwsub_wv_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwsub_wv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v24, v16
+; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %vc = sext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %vd = sub <vscale x 8 x i64> %va, %vc
+  ret <vscale x 8 x i64> %vd
+}
+
+define <vscale x 8 x i64> @vwsubu_wv_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, <vscale x 8 x i8> %vb) {
+; CHECK-LABEL: vwsubu_wv_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf8 v24, v16
+; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %vc = zext <vscale x 8 x i8> %vb to <vscale x 8 x i64>
+  %vd = sub <vscale x 8 x i64> %va, %vc
+  ret <vscale x 8 x i64> %vd
+}
+
+define <vscale x 8 x i64> @vwsub_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwsub_wx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vsext.vf8 v24, v16
+; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %vb = sext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %vc = sub <vscale x 8 x i64> %va, %vb
+  ret <vscale x 8 x i64> %vc
+}
+
+define <vscale x 8 x i64> @vwsubu_wx_nxv8i64_nxv8i8(<vscale x 8 x i64> %va, i8 %b) {
+; CHECK-LABEL: vwsubu_wx_nxv8i64_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vzext.vf8 v24, v16
+; CHECK-NEXT:    vsub.vv v8, v8, v24
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i8> poison, i8 %b, i8 0
+  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %vb = zext <vscale x 8 x i8> %splat to <vscale x 8 x i64>
+  %vc = sub <vscale x 8 x i64> %va, %vb
+  ret <vscale x 8 x i64> %vc
+}


        


More information about the llvm-commits mailing list