[llvm] 850dde0 - [RISCV][VP] Introduce vp saturating addition/subtraction and RISC-V support. (#82370)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 22 22:17:21 PST 2024


Author: Yeting Kuo
Date: 2024-02-23T14:17:15+08:00
New Revision: 850dde063b7f70bb592723064385e9f9ad39c96e

URL: https://github.com/llvm/llvm-project/commit/850dde063b7f70bb592723064385e9f9ad39c96e
DIFF: https://github.com/llvm/llvm-project/commit/850dde063b7f70bb592723064385e9f9ad39c96e.diff

LOG: [RISCV][VP] Introduce vp saturating addition/subtraction and RISC-V support. (#82370)

This patch also pick the MatchContext framework from DAGCombiner to an
indiviual header file to make the framework be used from other files in
llvm/lib/CodeGen/SelectionDAG/.

Added: 
    llvm/lib/CodeGen/SelectionDAG/MatchContext.h
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll
    llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
    llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll
    llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
    llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll

Modified: 
    llvm/docs/LangRef.rst
    llvm/include/llvm/IR/Intrinsics.td
    llvm/include/llvm/IR/VPIntrinsics.def
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/unittests/IR/VPIntrinsicTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 8f4495e25d0fa2..19ca9f6ae3fe3b 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -16749,6 +16749,7 @@ an operation is greater than the maximum value, the result is set (or
 "clamped") to this maximum. If it is below the minimum, it is clamped to this
 minimum.
 
+.. _int_sadd_sat:
 
 '``llvm.sadd.sat.*``' Intrinsics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -16798,6 +16799,8 @@ Examples
       %res = call i4 @llvm.sadd.sat.i4(i4 -4, i4 -5)  ; %res = -8
 
 
+.. _int_uadd_sat:
+
 '``llvm.uadd.sat.*``' Intrinsics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -16845,6 +16848,8 @@ Examples
       %res = call i4 @llvm.uadd.sat.i4(i4 8, i4 8)  ; %res = 15
 
 
+.. _int_ssub_sat:
+
 '``llvm.ssub.sat.*``' Intrinsics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -16893,6 +16898,8 @@ Examples
       %res = call i4 @llvm.ssub.sat.i4(i4 4, i4 -5)  ; %res = 7
 
 
+.. _int_usub_sat:
+
 '``llvm.usub.sat.*``' Intrinsics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -23610,6 +23617,202 @@ Examples:
       %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
 
 
+.. _int_vp_sadd_sat:
+
+'``llvm.vp.sadd.sat.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <16 x i32>  @llvm.vp.sadd.sat.v16i32 (<16 x i32> <left_op> <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+      declare <vscale x 4 x i32>  @llvm.vp.sadd.sat.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+      declare <256 x i64>  @llvm.vp.sadd.sat.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated signed saturating addition of two vectors of integers.
+
+
+Arguments:
+""""""""""
+
+The first two operands and the result have the same vector of integer type. The
+third operand is the vector mask and has the same number of elements as the
+result vector type. The fourth operand is the explicit vector length of the
+operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.sadd.sat``' intrinsic performs sadd.sat (:ref:`sadd.sat <int_sadd_sat>`)
+of the first and second vector operands on each enabled lane. The result on
+disabled lanes is a :ref:`poison value <poisonvalues>`.
+
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %t = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+      %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
+
+
+.. _int_vp_uadd_sat:
+
+'``llvm.vp.uadd.sat.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <16 x i32>  @llvm.vp.uadd.sat.v16i32 (<16 x i32> <left_op> <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+      declare <vscale x 4 x i32>  @llvm.vp.uadd.sat.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+      declare <256 x i64>  @llvm.vp.uadd.sat.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated unsigned saturating addition of two vectors of integers.
+
+
+Arguments:
+""""""""""
+
+The first two operands and the result have the same vector of integer type. The
+third operand is the vector mask and has the same number of elements as the
+result vector type. The fourth operand is the explicit vector length of the
+operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.uadd.sat``' intrinsic performs uadd.sat (:ref:`uadd.sat <int_uadd_sat>`)
+of the first and second vector operands on each enabled lane. The result on
+disabled lanes is a :ref:`poison value <poisonvalues>`.
+
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %t = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+      %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
+
+
+.. _int_vp_ssub_sat:
+
+'``llvm.vp.ssub.sat.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <16 x i32>  @llvm.vp.ssub.sat.v16i32 (<16 x i32> <left_op> <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+      declare <vscale x 4 x i32>  @llvm.vp.ssub.sat.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+      declare <256 x i64>  @llvm.vp.ssub.sat.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated signed saturating subtraction of two vectors of integers.
+
+
+Arguments:
+""""""""""
+
+The first two operands and the result have the same vector of integer type. The
+third operand is the vector mask and has the same number of elements as the
+result vector type. The fourth operand is the explicit vector length of the
+operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.ssub.sat``' intrinsic performs ssub.sat (:ref:`ssub.sat <int_ssub_sat>`)
+of the first and second vector operands on each enabled lane. The result on
+disabled lanes is a :ref:`poison value <poisonvalues>`.
+
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %t = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+      %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
+
+
+.. _int_vp_usub_sat:
+
+'``llvm.vp.usub.sat.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <16 x i32>  @llvm.vp.usub.sat.v16i32 (<16 x i32> <left_op> <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+      declare <vscale x 4 x i32>  @llvm.vp.usub.sat.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+      declare <256 x i64>  @llvm.vp.usub.sat.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated unsigned saturating subtraction of two vectors of integers.
+
+
+Arguments:
+""""""""""
+
+The first two operands and the result have the same vector of integer type. The
+third operand is the vector mask and has the same number of elements as the
+result vector type. The fourth operand is the explicit vector length of the
+operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.usub.sat``' intrinsic performs usub.sat (:ref:`usub.sat <int_usub_sat>`)
+of the first and second vector operands on each enabled lane. The result on
+disabled lanes is a :ref:`poison value <poisonvalues>`.
+
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %t = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+      %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison
+
+
 .. _int_vp_fshl:
 
 '``llvm.vp.fshl.*``' Intrinsics

diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 8c0d4d5db32d88..d7c1ce153a6c80 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1933,6 +1933,26 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
                                LLVMMatchType<0>,
                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                llvm_i32_ty]>;
+  def int_vp_sadd_sat : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+                             [ LLVMMatchType<0>,
+                               LLVMMatchType<0>,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
+  def int_vp_uadd_sat : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+                             [ LLVMMatchType<0>,
+                               LLVMMatchType<0>,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
+  def int_vp_ssub_sat : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+                             [ LLVMMatchType<0>,
+                               LLVMMatchType<0>,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
+  def int_vp_usub_sat : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+                             [ LLVMMatchType<0>,
+                               LLVMMatchType<0>,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
 
   // Floating-point arithmetic
   def int_vp_fadd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],

diff  --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 3b32b60609f536..4089acf9ec3f05 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -293,6 +293,30 @@ BEGIN_REGISTER_VP(vp_fshr, 3, 4, VP_FSHR, -1)
 VP_PROPERTY_FUNCTIONAL_INTRINSIC(fshr)
 VP_PROPERTY_FUNCTIONAL_SDOPC(FSHR)
 END_REGISTER_VP(vp_fshr, VP_FSHR)
+
+// llvm.vp.sadd.sat(x,y,mask,vlen)
+BEGIN_REGISTER_VP(vp_sadd_sat, 2, 3, VP_SADDSAT, -1)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(sadd_sat)
+VP_PROPERTY_FUNCTIONAL_SDOPC(SADDSAT)
+END_REGISTER_VP(vp_sadd_sat, VP_SADDSAT)
+
+// llvm.vp.uadd.sat(x,y,mask,vlen)
+BEGIN_REGISTER_VP(vp_uadd_sat, 2, 3, VP_UADDSAT, -1)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(uadd_sat)
+VP_PROPERTY_FUNCTIONAL_SDOPC(UADDSAT)
+END_REGISTER_VP(vp_uadd_sat, VP_UADDSAT)
+
+// llvm.vp.ssub.sat(x,y,mask,vlen)
+BEGIN_REGISTER_VP(vp_ssub_sat, 2, 3, VP_SSUBSAT, -1)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(ssub_sat)
+VP_PROPERTY_FUNCTIONAL_SDOPC(SSUBSAT)
+END_REGISTER_VP(vp_ssub_sat, VP_SSUBSAT)
+
+// llvm.vp.usub.sat(x,y,mask,vlen)
+BEGIN_REGISTER_VP(vp_usub_sat, 2, 3, VP_USUBSAT, -1)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(usub_sat)
+VP_PROPERTY_FUNCTIONAL_SDOPC(USUBSAT)
+END_REGISTER_VP(vp_usub_sat, VP_USUBSAT)
 ///// } Integer Arithmetic
 
 ///// Floating-Point Arithmetic {

diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ed43dd7f528821..6a28bc8da223b5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -76,6 +76,8 @@
 #include <utility>
 #include <variant>
 
+#include "MatchContext.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "dagcombine"
@@ -888,141 +890,6 @@ class WorklistInserter : public SelectionDAG::DAGUpdateListener {
   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 };
 
-class EmptyMatchContext {
-  SelectionDAG &DAG;
-  const TargetLowering &TLI;
-
-public:
-  EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
-      : DAG(DAG), TLI(TLI) {}
-
-  bool match(SDValue OpN, unsigned Opcode) const {
-    return Opcode == OpN->getOpcode();
-  }
-
-  // Same as SelectionDAG::getNode().
-  template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
-    return DAG.getNode(std::forward<ArgT>(Args)...);
-  }
-
-  bool isOperationLegalOrCustom(unsigned Op, EVT VT,
-                                bool LegalOnly = false) const {
-    return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
-  }
-};
-
-class VPMatchContext {
-  SelectionDAG &DAG;
-  const TargetLowering &TLI;
-  SDValue RootMaskOp;
-  SDValue RootVectorLenOp;
-
-public:
-  VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
-      : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
-    assert(Root->isVPOpcode());
-    if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
-      RootMaskOp = Root->getOperand(*RootMaskPos);
-    else if (Root->getOpcode() == ISD::VP_SELECT)
-      RootMaskOp = DAG.getAllOnesConstant(SDLoc(Root),
-                                          Root->getOperand(0).getValueType());
-
-    if (auto RootVLenPos =
-            ISD::getVPExplicitVectorLengthIdx(Root->getOpcode()))
-      RootVectorLenOp = Root->getOperand(*RootVLenPos);
-  }
-
-  /// whether \p OpVal is a node that is functionally compatible with the
-  /// NodeType \p Opc
-  bool match(SDValue OpVal, unsigned Opc) const {
-    if (!OpVal->isVPOpcode())
-      return OpVal->getOpcode() == Opc;
-
-    auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
-                                           !OpVal->getFlags().hasNoFPExcept());
-    if (BaseOpc != Opc)
-      return false;
-
-    // Make sure the mask of OpVal is true mask or is same as Root's.
-    unsigned VPOpcode = OpVal->getOpcode();
-    if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
-      SDValue MaskOp = OpVal.getOperand(*MaskPos);
-      if (RootMaskOp != MaskOp &&
-          !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode()))
-        return false;
-    }
-
-    // Make sure the EVL of OpVal is same as Root's.
-    if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
-      if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
-        return false;
-    return true;
-  }
-
-  // Specialize based on number of operands.
-  // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
-  // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
-  // DAG.getNode(Opcode, DL, VT); }
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
-    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
-    assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
-           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
-    return DAG.getNode(VPOpcode, DL, VT,
-                       {Operand, RootMaskOp, RootVectorLenOp});
-  }
-
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
-                  SDValue N2) {
-    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
-    assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
-           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
-    return DAG.getNode(VPOpcode, DL, VT,
-                       {N1, N2, RootMaskOp, RootVectorLenOp});
-  }
-
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
-                  SDValue N2, SDValue N3) {
-    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
-    assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
-           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
-    return DAG.getNode(VPOpcode, DL, VT,
-                       {N1, N2, N3, RootMaskOp, RootVectorLenOp});
-  }
-
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
-                  SDNodeFlags Flags) {
-    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
-    assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
-           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
-    return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
-                       Flags);
-  }
-
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
-                  SDValue N2, SDNodeFlags Flags) {
-    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
-    assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
-           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
-    return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
-                       Flags);
-  }
-
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
-                  SDValue N2, SDValue N3, SDNodeFlags Flags) {
-    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
-    assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
-           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
-    return DAG.getNode(VPOpcode, DL, VT,
-                       {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
-  }
-
-  bool isOperationLegalOrCustom(unsigned Op, EVT VT,
-                                bool LegalOnly = false) const {
-    unsigned VPOp = ISD::getVPForBaseOpcode(Op);
-    return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
-  }
-};
-
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a4ba261686c688..df17d6530b0de3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -217,7 +217,15 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SSUBSAT:
   case ISD::USUBSAT:
   case ISD::SSHLSAT:
-  case ISD::USHLSAT:     Res = PromoteIntRes_ADDSUBSHLSAT(N); break;
+  case ISD::USHLSAT:
+    Res = PromoteIntRes_ADDSUBSHLSAT<EmptyMatchContext>(N);
+    break;
+  case ISD::VP_SADDSAT:
+  case ISD::VP_UADDSAT:
+  case ISD::VP_SSUBSAT:
+  case ISD::VP_USUBSAT:
+    Res = PromoteIntRes_ADDSUBSHLSAT<VPMatchContext>(N);
+    break;
 
   case ISD::SMULFIX:
   case ISD::SMULFIXSAT:
@@ -934,6 +942,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
   return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
 }
 
+template <class MatchContextClass>
 SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
   // If the promoted type is legal, we can convert this to:
   //   1. ANY_EXTEND iN to iM
@@ -945,11 +954,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
   SDLoc dl(N);
   SDValue Op1 = N->getOperand(0);
   SDValue Op2 = N->getOperand(1);
+  MatchContextClass matcher(DAG, TLI, N);
   unsigned OldBits = Op1.getScalarValueSizeInBits();
 
-  unsigned Opcode = N->getOpcode();
+  unsigned Opcode = matcher.getRootBaseOpcode();
   bool IsShift = Opcode == ISD::USHLSAT || Opcode == ISD::SSHLSAT;
 
+  // FIXME: We need vp-aware PromotedInteger functions.
   SDValue Op1Promoted, Op2Promoted;
   if (IsShift) {
     Op1Promoted = GetPromotedInteger(Op1);
@@ -968,18 +979,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
     APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits);
     SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
     SDValue Add =
-        DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
-    return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
+        matcher.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
+    return matcher.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
   }
 
   // USUBSAT can always be promoted as long as we have zero-extended the args.
   if (Opcode == ISD::USUBSAT)
-    return DAG.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted,
-                       Op2Promoted);
+    return matcher.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted,
+                           Op2Promoted);
 
   // Shift cannot use a min/max expansion, we can't detect overflow if all of
   // the bits have been shifted out.
-  if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) {
+  if (IsShift || matcher.isOperationLegal(Opcode, PromotedType)) {
     unsigned ShiftOp;
     switch (Opcode) {
     case ISD::SADDSAT:
@@ -1002,11 +1013,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
         DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
     if (!IsShift)
       Op2Promoted =
-          DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+          matcher.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
 
     SDValue Result =
-        DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
-    return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+        matcher.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+    return matcher.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
   }
 
   unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
@@ -1015,9 +1026,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
   SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
   SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
   SDValue Result =
-      DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
-  Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
-  Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
+      matcher.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
+  Result = matcher.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
+  Result = matcher.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
   return Result;
 }
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 91149871628574..3c84f67653eca8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
 #define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
 
+#include "MatchContext.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -355,6 +356,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue PromoteIntRes_VAARG(SDNode *N);
   SDValue PromoteIntRes_VSCALE(SDNode *N);
   SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+  template <class MatchContextClass>
   SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N);
   SDValue PromoteIntRes_MULFIX(SDNode *N);
   SDValue PromoteIntRes_DIVFIX(SDNode *N);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7fc252600534f0..90cda2a1155b61 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1163,10 +1163,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SMAX: case ISD::VP_SMAX:
   case ISD::UMIN: case ISD::VP_UMIN:
   case ISD::UMAX: case ISD::VP_UMAX:
-  case ISD::SADDSAT:
-  case ISD::UADDSAT:
-  case ISD::SSUBSAT:
-  case ISD::USUBSAT:
+  case ISD::SADDSAT: case ISD::VP_SADDSAT:
+  case ISD::UADDSAT: case ISD::VP_UADDSAT:
+  case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
+  case ISD::USUBSAT: case ISD::VP_USUBSAT:
   case ISD::SSHLSAT:
   case ISD::USHLSAT:
   case ISD::ROTL:
@@ -4140,10 +4140,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SMAX: case ISD::VP_SMAX:
   case ISD::UMIN: case ISD::VP_UMIN:
   case ISD::UMAX: case ISD::VP_UMAX:
-  case ISD::UADDSAT:
-  case ISD::SADDSAT:
-  case ISD::USUBSAT:
-  case ISD::SSUBSAT:
+  case ISD::UADDSAT: case ISD::VP_UADDSAT:
+  case ISD::SADDSAT: case ISD::VP_SADDSAT:
+  case ISD::USUBSAT: case ISD::VP_USUBSAT:
+  case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
   case ISD::SSHLSAT:
   case ISD::USHLSAT:
   case ISD::ROTL:

diff  --git a/llvm/lib/CodeGen/SelectionDAG/MatchContext.h b/llvm/lib/CodeGen/SelectionDAG/MatchContext.h
new file mode 100644
index 00000000000000..f965cb952f97a2
--- /dev/null
+++ b/llvm/lib/CodeGen/SelectionDAG/MatchContext.h
@@ -0,0 +1,175 @@
+//===---------------- llvm/CodeGen/MatchContext.h  --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the EmptyMatchContext class and VPMatchContext class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_MATCHCONTEXT_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_MATCHCONTEXT_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
+
+using namespace llvm;
+
+namespace {
+class EmptyMatchContext {
+  SelectionDAG &DAG;
+  const TargetLowering &TLI;
+  SDNode *Root;
+
+public:
+  EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
+      : DAG(DAG), TLI(TLI), Root(Root) {}
+
+  unsigned getRootBaseOpcode() { return Root->getOpcode(); }
+  bool match(SDValue OpN, unsigned Opcode) const {
+    return Opcode == OpN->getOpcode();
+  }
+
+  // Same as SelectionDAG::getNode().
+  template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
+    return DAG.getNode(std::forward<ArgT>(Args)...);
+  }
+
+  bool isOperationLegal(unsigned Op, EVT VT) const {
+    return TLI.isOperationLegal(Op, VT);
+  }
+
+  bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+                                bool LegalOnly = false) const {
+    return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
+  }
+};
+
+class VPMatchContext {
+  SelectionDAG &DAG;
+  const TargetLowering &TLI;
+  SDValue RootMaskOp;
+  SDValue RootVectorLenOp;
+  SDNode *Root;
+
+public:
+  VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *_Root)
+      : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
+    Root = _Root;
+    assert(Root->isVPOpcode());
+    if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
+      RootMaskOp = Root->getOperand(*RootMaskPos);
+    else if (Root->getOpcode() == ISD::VP_SELECT)
+      RootMaskOp = DAG.getAllOnesConstant(SDLoc(Root),
+                                          Root->getOperand(0).getValueType());
+
+    if (auto RootVLenPos = ISD::getVPExplicitVectorLengthIdx(Root->getOpcode()))
+      RootVectorLenOp = Root->getOperand(*RootVLenPos);
+  }
+
+  unsigned getRootBaseOpcode() {
+    std::optional<unsigned> Opcode = ISD::getBaseOpcodeForVP(
+        Root->getOpcode(), !Root->getFlags().hasNoFPExcept());
+    assert(Opcode.has_value());
+    return *Opcode;
+  }
+
+  /// whether \p OpVal is a node that is functionally compatible with the
+  /// NodeType \p Opc
+  bool match(SDValue OpVal, unsigned Opc) const {
+    if (!OpVal->isVPOpcode())
+      return OpVal->getOpcode() == Opc;
+
+    auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
+                                           !OpVal->getFlags().hasNoFPExcept());
+    if (BaseOpc != Opc)
+      return false;
+
+    // Make sure the mask of OpVal is true mask or is same as Root's.
+    unsigned VPOpcode = OpVal->getOpcode();
+    if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
+      SDValue MaskOp = OpVal.getOperand(*MaskPos);
+      if (RootMaskOp != MaskOp &&
+          !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode()))
+        return false;
+    }
+
+    // Make sure the EVL of OpVal is same as Root's.
+    if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
+      if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
+        return false;
+    return true;
+  }
+
+  // Specialize based on number of operands.
+  // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
+  // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
+  // DAG.getNode(Opcode, DL, VT); }
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
+    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+    assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
+           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
+    return DAG.getNode(VPOpcode, DL, VT,
+                       {Operand, RootMaskOp, RootVectorLenOp});
+  }
+
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+                  SDValue N2) {
+    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+    assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
+           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
+    return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp});
+  }
+
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+                  SDValue N2, SDValue N3) {
+    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+    assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
+           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
+    return DAG.getNode(VPOpcode, DL, VT,
+                       {N1, N2, N3, RootMaskOp, RootVectorLenOp});
+  }
+
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
+                  SDNodeFlags Flags) {
+    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+    assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
+           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
+    return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
+                       Flags);
+  }
+
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+                  SDValue N2, SDNodeFlags Flags) {
+    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+    assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
+           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
+    return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
+                       Flags);
+  }
+
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+                  SDValue N2, SDValue N3, SDNodeFlags Flags) {
+    unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+    assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
+           ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
+    return DAG.getNode(VPOpcode, DL, VT,
+                       {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
+  }
+
+  bool isOperationLegal(unsigned Op, EVT VT) const {
+    unsigned VPOp = ISD::getVPForBaseOpcode(Op);
+    return TLI.isOperationLegal(VPOp, VT);
+  }
+
+  bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+                                bool LegalOnly = false) const {
+    unsigned VPOp = ISD::getVPForBaseOpcode(Op);
+    return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
+  }
+};
+} // end anonymous namespace
+#endif

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7540b22d13b7fc..540c2e7476dc18 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -691,7 +691,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         ISD::VP_FP_TO_UINT,  ISD::VP_SETCC,       ISD::VP_SIGN_EXTEND,
         ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE,    ISD::VP_SMIN,
         ISD::VP_SMAX,        ISD::VP_UMIN,        ISD::VP_UMAX,
-        ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE};
+        ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
+        ISD::VP_SADDSAT,     ISD::VP_UADDSAT,     ISD::VP_SSUBSAT,
+        ISD::VP_USUBSAT};
 
     static const unsigned FloatingPointVPOps[] = {
         ISD::VP_FADD,        ISD::VP_FSUB,        ISD::VP_FMUL,
@@ -5752,6 +5754,10 @@ static unsigned getRISCVVLOp(SDValue Op) {
   VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
   VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
   VP_CASE(BITREVERSE) // VP_BITREVERSE
+  VP_CASE(SADDSAT)    // VP_SADDSAT
+  VP_CASE(UADDSAT)    // VP_UADDSAT
+  VP_CASE(SSUBSAT)    // VP_SSUBSAT
+  VP_CASE(USUBSAT)    // VP_USUBSAT
   VP_CASE(BSWAP)      // VP_BSWAP
   VP_CASE(CTLZ)       // VP_CTLZ
   VP_CASE(CTTZ)       // VP_CTTZ
@@ -6791,6 +6797,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::VP_UDIV:
   case ISD::VP_SREM:
   case ISD::VP_UREM:
+  case ISD::VP_UADDSAT:
+  case ISD::VP_USUBSAT:
+  case ISD::VP_SADDSAT:
+  case ISD::VP_SSUBSAT:
     return lowerVPOp(Op, DAG);
   case ISD::VP_AND:
   case ISD::VP_OR:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
new file mode 100644
index 00000000000000..6c5dd0403dff17
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
@@ -0,0 +1,1701 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <8 x i7> @llvm.vp.sadd.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
+
+define <8 x i7> @vsadd_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vadd.vv v9, v9, v9
+; CHECK-NEXT:    vsra.vi v9, v9, 1
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    li a0, 63
+; CHECK-NEXT:    vmin.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 192
+; CHECK-NEXT:    vmax.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i7> @llvm.vp.sadd.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i7> %v
+}
+
+declare <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vsadd_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vsadd_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vsadd_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vsadd_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vsadd_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vsadd_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vsadd_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+declare <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32)
+
+define <5 x i8> @vsadd_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vsadd_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vsadd_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vsadd_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vsadd_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vsadd_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vsadd_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vsadd_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vsadd_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vsadd_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vsadd_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vsadd_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vsadd_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vsadd_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vsadd_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vsadd_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vsadd_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vsadd_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+declare <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
+
+define <256 x i8> @vsadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v258i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    li a2, 128
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    addi a0, a1, -128
+; CHECK-NEXT:    sltu a3, a1, a0
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a0, a3, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT:    bltu a1, a2, .LBB32_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:  .LBB32_2:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+  ret <256 x i8> %v
+}
+
+define <256 x i8> @vsadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v258i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 128
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    bltu a0, a2, .LBB33_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:  .LBB33_2:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    addi a1, a0, -128
+; CHECK-NEXT:    sltu a0, a0, a1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v16, v16, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %head = insertelement <256 x i1> poison, i1 true, i32 0
+  %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+  ret <256 x i8> %v
+}
+
+; Test splitting when the %evl is a known constant.
+
+define <256 x i8> @vsadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vsadd_vi_v258i8_evl129:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v24, (a0)
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    vsetivli zero, 1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vsadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129)
+  ret <256 x i8> %v
+}
+
+; FIXME: The upper half is doing nothing.
+
+define <256 x i8> @vsadd_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vsadd_vi_v258i8_evl128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v24, (a0)
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    vsetivli zero, 0, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vsadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128)
+  ret <256 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vsadd_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vsadd_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vsadd_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vsadd_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vsadd_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vsadd_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vsadd_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vsadd_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vsadd_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vsadd_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vsadd_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vsadd_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vsadd_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vsadd_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vsadd_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vsadd_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vsadd_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vsadd_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vsadd_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vsadd_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vsadd_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vsadd_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vsadd_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vsadd_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vsadd_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vsadd_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vsadd_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vsadd_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vsadd_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vsadd_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vsadd_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vsadd_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vsadd_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vsadd_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vsadd_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vsadd_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vsadd_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vsadd_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vsadd_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vsadd_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vsadd_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vsadd_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vsadd_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vsadd_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vsadd_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vsadd_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vsadd_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vsadd_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vsadd_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vsadd_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vsadd_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vsadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v2i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v9
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v2i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vsadd_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vsadd_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vsadd_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vsadd_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vsadd_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v10, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vsadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v4i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v4i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vsadd_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vsadd_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vsadd_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vsadd_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vsadd_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v12, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vsadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v8i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v12
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v8i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vsadd_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vsadd_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vsadd_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vsadd_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vsadd_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v16i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v16i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vsadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v16i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v16i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vsadd_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vsadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+; Test that split-legalization works as expected.
+
+declare <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
+
+define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v32i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB108_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB108_2:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    addi a1, a0, -16
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vsadd.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v32i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB108_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB108_2:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; RV64-NEXT:    addi a1, a0, -16
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vsadd.vi v16, v16, -1, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+  ret <32 x i64> %v
+}
+
+define <32 x i64> @vsadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vi_v32i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB109_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB109_2:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v24
+; RV32-NEXT:    addi a1, a0, -16
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsadd.vv v16, v16, v24
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vi_v32i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB109_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB109_2:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsadd.vi v8, v8, -1
+; RV64-NEXT:    addi a1, a0, -16
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsadd.vi v16, v16, -1
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %head = insertelement <32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+  ret <32 x i64> %v
+}
+
+; FIXME: We don't match vsadd.vi on RV32.
+
+define <32 x i64> @vsadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vsadd_vx_v32i64_evl12:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    vsetivli zero, 12, e64, m8, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsetivli zero, 0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vsadd.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v32i64_evl12:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    vsetivli zero, 12, e64, m8, ta, ma
+; RV64-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; RV64-NEXT:    vsetivli zero, 0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vsadd.vi v16, v16, -1, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12)
+  ret <32 x i64> %v
+}
+
+define <32 x i64> @vsadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vsadd_vx_v32i64_evl27:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsetivli zero, 11, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vsadd.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_v32i64_evl27:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; RV64-NEXT:    vsetivli zero, 11, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vsadd.vi v16, v16, -1, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27)
+  ret <32 x i64> %v
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll
new file mode 100644
index 00000000000000..6227f8abe599e4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll
@@ -0,0 +1,1697 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <8 x i7> @llvm.vp.uadd.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
+
+define <8 x i7> @vsaddu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 127
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vminu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i7> @llvm.vp.uadd.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i7> %v
+}
+
+declare <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vsaddu_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vsaddu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vsaddu_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vsaddu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vsaddu_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vsaddu_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vsaddu_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+declare <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32)
+
+define <5 x i8> @vsaddu_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vsaddu_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vsaddu_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vsaddu_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vsaddu_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vsaddu_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vsaddu_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vsaddu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vsaddu_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vsaddu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vsaddu_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vsaddu_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vsaddu_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vsaddu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vsaddu_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vsaddu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vsaddu_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vsaddu_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+declare <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
+
+define <256 x i8> @vsaddu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v258i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    li a2, 128
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    addi a0, a1, -128
+; CHECK-NEXT:    sltu a3, a1, a0
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a0, a3, a0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v16, v16, -1, v0.t
+; CHECK-NEXT:    bltu a1, a2, .LBB32_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:  .LBB32_2:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+  ret <256 x i8> %v
+}
+
+define <256 x i8> @vsaddu_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v258i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 128
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    bltu a0, a2, .LBB33_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:  .LBB33_2:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    addi a1, a0, -128
+; CHECK-NEXT:    sltu a0, a0, a1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v16, v16, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %head = insertelement <256 x i1> poison, i1 true, i32 0
+  %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+  ret <256 x i8> %v
+}
+
+; Test splitting when the %evl is a known constant.
+
+define <256 x i8> @vsaddu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vsaddu_vi_v258i8_evl129:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v24, (a0)
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    vsetivli zero, 1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vsaddu.vi v16, v16, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129)
+  ret <256 x i8> %v
+}
+
+; FIXME: The upper half is doing nothing.
+
+define <256 x i8> @vsaddu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vsaddu_vi_v258i8_evl128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v24, (a0)
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    vsetivli zero, 0, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vsaddu.vi v16, v16, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128)
+  ret <256 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vsaddu_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vsaddu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vsaddu_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vsaddu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vsaddu_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vsaddu_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vsaddu_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vsaddu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vsaddu_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vsaddu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vsaddu_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vsaddu_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vsaddu_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vsaddu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vsaddu_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vsaddu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vsaddu_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vsaddu_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vsaddu_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vsaddu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vsaddu_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vsaddu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vsaddu_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vsaddu_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vsaddu_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vsaddu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vsaddu_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vsaddu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vsaddu_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vsaddu_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vsaddu_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vsaddu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vsaddu_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vsaddu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vsaddu_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vsaddu_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vsaddu_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vsaddu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vsaddu_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vsaddu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vsaddu_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vsaddu_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vsaddu_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vsaddu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vsaddu_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vsaddu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vsaddu_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vsaddu_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vsaddu_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vsaddu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vsaddu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vsaddu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v2i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v9
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v2i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vsaddu_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vsaddu_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vsaddu_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vsaddu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vsaddu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v10, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vsaddu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v4i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v4i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vsaddu_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vsaddu_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vsaddu_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vsaddu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vsaddu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v12, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vsaddu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v8i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v12
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v8i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vsaddu_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vsaddu_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vsaddu_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vsaddu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vsaddu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v16i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v16i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vsaddu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v16i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v16i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vsaddu_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vsaddu_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+; Test that split-legalization works as expected.
+
+declare <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
+
+define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v32i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB108_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB108_2:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v24, v0.t
+; RV32-NEXT:    addi a1, a0, -16
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vsaddu.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v32i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB108_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB108_2:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; RV64-NEXT:    addi a1, a0, -16
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vsaddu.vi v16, v16, -1, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+  ret <32 x i64> %v
+}
+
+define <32 x i64> @vsaddu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vi_v32i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB109_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB109_2:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v24
+; RV32-NEXT:    addi a1, a0, -16
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsaddu.vv v16, v16, v24
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vi_v32i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB109_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB109_2:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsaddu.vi v8, v8, -1
+; RV64-NEXT:    addi a1, a0, -16
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsaddu.vi v16, v16, -1
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %head = insertelement <32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+  ret <32 x i64> %v
+}
+
+; FIXME: We don't match vsaddu.vi on RV32.
+
+define <32 x i64> @vsaddu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vsaddu_vx_v32i64_evl12:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    vsetivli zero, 12, e64, m8, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsetivli zero, 0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vsaddu.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v32i64_evl12:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    vsetivli zero, 12, e64, m8, ta, ma
+; RV64-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; RV64-NEXT:    vsetivli zero, 0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vsaddu.vi v16, v16, -1, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12)
+  ret <32 x i64> %v
+}
+
+define <32 x i64> @vsaddu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vsaddu_vx_v32i64_evl27:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsetivli zero, 11, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vsaddu.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_v32i64_evl27:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; RV64-NEXT:    vsetivli zero, 11, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vsaddu.vi v16, v16, -1, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27)
+  ret <32 x i64> %v
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
new file mode 100644
index 00000000000000..6360cf49d8d478
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
@@ -0,0 +1,1745 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <8 x i7> @llvm.vp.ssub.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
+
+define <8 x i7> @vssub_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vadd.vv v9, v9, v9
+; CHECK-NEXT:    vsra.vi v9, v9, 1
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    li a0, 63
+; CHECK-NEXT:    vmin.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 192
+; CHECK-NEXT:    vmax.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i7> @llvm.vp.ssub.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i7> %v
+}
+
+declare <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vssub_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vssub_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vssub_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vssub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vssub_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vssub_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vssub_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+declare <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32)
+
+define <5 x i8> @vssub_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vssub_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vssub_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vssub_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vssub_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vssub_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vssub_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vssub_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vssub_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vssub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vssub_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vssub_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vssub_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vssub_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vssub_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vssub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vssub_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vssub_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+declare <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
+
+define <256 x i8> @vssub_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v258i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    li a2, 128
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    addi a0, a1, -128
+; CHECK-NEXT:    sltu a3, a1, a0
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a3, a3, a0
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    bltu a1, a2, .LBB32_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:  .LBB32_2:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+  ret <256 x i8> %v
+}
+
+define <256 x i8> @vssub_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v258i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 128
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    bltu a0, a2, .LBB33_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:  .LBB33_2:
+; CHECK-NEXT:    li a2, -1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a2
+; CHECK-NEXT:    addi a1, a0, -128
+; CHECK-NEXT:    sltu a0, a0, a1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v16, v16, a2
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %head = insertelement <256 x i1> poison, i1 true, i32 0
+  %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+  ret <256 x i8> %v
+}
+
+; Test splitting when the %evl is a known constant.
+
+define <256 x i8> @vssub_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vssub_vi_v258i8_evl129:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v24, (a0)
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsetivli zero, 1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vssub.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129)
+  ret <256 x i8> %v
+}
+
+; FIXME: The upper half is doing nothing.
+
+define <256 x i8> @vssub_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vssub_vi_v258i8_evl128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v24, (a0)
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsetivli zero, 0, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vssub.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128)
+  ret <256 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vssub_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vssub_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vssub_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vssub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vssub_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vssub_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vssub_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vssub_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vssub_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vssub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vssub_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vssub_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vssub_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vssub_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vssub_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vssub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vssub_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vssub_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vssub_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vssub_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vssub_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vssub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vssub_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vssub_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vssub_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vssub_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vssub_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vssub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vssub_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vssub_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vssub_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vssub_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vssub_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vssub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vssub_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vssub_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vssub_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vssub_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vssub_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vssub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vssub_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vssub_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vssub_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vssub_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vssub_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vssub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vssub_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vssub_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vssub_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vssub_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vssub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v9, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vssub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v2i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v9
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v2i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vssub_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vssub_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vssub_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vssub_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vssub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v10, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vssub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v4i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v4i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vssub_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vssub_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vssub_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vssub_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vssub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v12, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vssub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v8i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v12
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v8i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vssub_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vssub_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vssub_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vssub_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vssub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v16i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v16i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vssub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v16i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v16i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vssub_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vssub_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+; Test that split-legalization works as expected.
+
+declare <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
+
+define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v32i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB108_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB108_2:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v24, v0.t
+; RV32-NEXT:    addi a1, a0, -16
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vssub.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v32i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB108_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB108_2:
+; RV64-NEXT:    li a2, -1
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a2, v0.t
+; RV64-NEXT:    addi a1, a0, -16
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vssub.vx v16, v16, a2, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+  ret <32 x i64> %v
+}
+
+define <32 x i64> @vssub_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vi_v32i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB109_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB109_2:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v24
+; RV32-NEXT:    addi a1, a0, -16
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vssub.vv v16, v16, v24
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vi_v32i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB109_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB109_2:
+; RV64-NEXT:    li a2, -1
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a2
+; RV64-NEXT:    addi a1, a0, -16
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vssub.vx v16, v16, a2
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %head = insertelement <32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+  ret <32 x i64> %v
+}
+
+; FIXME: We don't match vssub.vi on RV32.
+
+define <32 x i64> @vssub_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vssub_vx_v32i64_evl12:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    vsetivli zero, 12, e64, m8, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsetivli zero, 0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vssub.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v32i64_evl12:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    vsetivli zero, 12, e64, m8, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsetivli zero, 0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vssub.vx v16, v16, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12)
+  ret <32 x i64> %v
+}
+
+define <32 x i64> @vssub_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vssub_vx_v32i64_evl27:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsetivli zero, 11, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vssub.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_v32i64_evl27:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsetivli zero, 11, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vssub.vx v16, v16, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27)
+  ret <32 x i64> %v
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll
new file mode 100644
index 00000000000000..6ea97588712307
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll
@@ -0,0 +1,1740 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <8 x i7> @llvm.vp.usub.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
+
+define <8 x i7> @vssubu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 127
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vand.vx v9, v9, a1
+; CHECK-NEXT:    vand.vx v8, v8, a1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i7> @llvm.vp.usub.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i7> %v
+}
+
+declare <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vssubu_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vssubu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vssubu_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vssubu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vssubu_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vssubu_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vssubu_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
+declare <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32)
+
+define <5 x i8> @vssubu_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vssubu_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vssubu_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vssubu_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vssubu_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v5i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+define <5 x i8> @vssubu_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v5i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+  %head = insertelement <5 x i1> poison, i1 true, i32 0
+  %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+  %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+  ret <5 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vssubu_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vssubu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vssubu_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vssubu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vssubu_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vssubu_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vssubu_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vssubu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vssubu_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vssubu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vssubu_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vssubu_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i8> %v
+}
+
+declare <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
+
+define <256 x i8> @vssubu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v258i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    li a2, 128
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    addi a0, a1, -128
+; CHECK-NEXT:    sltu a3, a1, a0
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a3, a3, a0
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    bltu a1, a2, .LBB32_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:  .LBB32_2:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+  ret <256 x i8> %v
+}
+
+define <256 x i8> @vssubu_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v258i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 128
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:    bltu a0, a2, .LBB33_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:  .LBB33_2:
+; CHECK-NEXT:    li a2, -1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a2
+; CHECK-NEXT:    addi a1, a0, -128
+; CHECK-NEXT:    sltu a0, a0, a1
+; CHECK-NEXT:    addi a0, a0, -1
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v16, v16, a2
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %head = insertelement <256 x i1> poison, i1 true, i32 0
+  %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+  ret <256 x i8> %v
+}
+
+; Test splitting when the %evl is a known constant.
+
+define <256 x i8> @vssubu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vssubu_vi_v258i8_evl129:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v24, (a0)
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsetivli zero, 1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vssubu.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129)
+  ret <256 x i8> %v
+}
+
+; FIXME: The upper half is doing nothing.
+
+define <256 x i8> @vssubu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vssubu_vi_v258i8_evl128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v24, (a0)
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    vsetivli zero, 0, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vssubu.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+  %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128)
+  ret <256 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vssubu_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vssubu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vssubu_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vssubu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vssubu_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vssubu_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vssubu_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vssubu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vssubu_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vssubu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vssubu_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vssubu_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vssubu_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vssubu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vssubu_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vssubu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vssubu_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vssubu_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vssubu_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vssubu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vssubu_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vssubu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vssubu_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vssubu_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vssubu_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vssubu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vssubu_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vssubu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vssubu_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vssubu_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vssubu_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vssubu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vssubu_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vssubu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vssubu_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vssubu_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vssubu_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vssubu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vssubu_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vssubu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vssubu_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vssubu_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vssubu_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vssubu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vssubu_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vssubu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vssubu_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vssubu_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vssubu_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vssubu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vssubu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vssubu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v2i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v9
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v2i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vssubu_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vssubu_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vssubu_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vssubu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vssubu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v10, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vssubu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v4i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v4i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vssubu_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vssubu_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %head = insertelement <4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vssubu_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vssubu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vssubu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v12, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vssubu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v8i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v12
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v8i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vssubu_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vssubu_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %head = insertelement <8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vssubu_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vssubu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vssubu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v16i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v16i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vssubu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v16i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v16i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vssubu_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vssubu_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %head = insertelement <16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+  ret <16 x i64> %v
+}
+
+; Test that split-legalization works as expected.
+
+declare <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
+
+define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v32i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB108_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB108_2:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v24, v0.t
+; RV32-NEXT:    addi a1, a0, -16
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vssubu.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v32i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB108_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB108_2:
+; RV64-NEXT:    li a2, -1
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a2, v0.t
+; RV64-NEXT:    addi a1, a0, -16
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vssubu.vx v16, v16, a2, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+  ret <32 x i64> %v
+}
+
+define <32 x i64> @vssubu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vi_v32i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB109_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB109_2:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v24
+; RV32-NEXT:    addi a1, a0, -16
+; RV32-NEXT:    sltu a0, a0, a1
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vssubu.vv v16, v16, v24
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vi_v32i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    mv a1, a0
+; RV64-NEXT:    bltu a0, a2, .LBB109_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB109_2:
+; RV64-NEXT:    li a2, -1
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a2
+; RV64-NEXT:    addi a1, a0, -16
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    addi a0, a0, -1
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vssubu.vx v16, v16, a2
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %head = insertelement <32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+  ret <32 x i64> %v
+}
+
+; FIXME: We don't match vssubu.vi on RV32.
+
+define <32 x i64> @vssubu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vssubu_vx_v32i64_evl12:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    vsetivli zero, 12, e64, m8, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsetivli zero, 0, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vssubu.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v32i64_evl12:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    vsetivli zero, 12, e64, m8, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsetivli zero, 0, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vssubu.vx v16, v16, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12)
+  ret <32 x i64> %v
+}
+
+define <32 x i64> @vssubu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vssubu_vx_v32i64_evl27:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT:    vslidedown.vi v1, v0, 2
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.i v24, -1
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsetivli zero, 11, e64, m8, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vssubu.vv v16, v16, v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_v32i64_evl27:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v0, 2
+; RV64-NEXT:    li a0, -1
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsetivli zero, 11, e64, m8, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vssubu.vx v16, v16, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+  %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27)
+  ret <32 x i64> %v
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
new file mode 100644
index 00000000000000..caaeae55ed78ed
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
@@ -0,0 +1,2015 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 8 x i7> @llvm.vp.sadd.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i7> @vsadd_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 63
+; CHECK-NEXT:    vmin.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 192
+; CHECK-NEXT:    vmax.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i7> @llvm.vp.sadd.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl)
+  ret <vscale x 8 x i7> %v
+}
+
+declare <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vsadd_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vsadd_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsadd_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsadd_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsadd_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsadd_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsadd_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32)
+
+define <vscale x 3 x i8> @vsadd_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsadd_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsadd_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsadd_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsadd_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsadd_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vsadd_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsadd_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsadd_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsadd_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsadd_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsadd_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vsadd_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsadd_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsadd_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsadd_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsadd_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsadd_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vsadd_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsadd_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsadd_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsadd_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsadd_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsadd_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vsadd_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsadd_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsadd_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsadd_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsadd_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsadd_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vsadd_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsadd_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsadd_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsadd_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsadd_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsadd_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+; Test that split-legalization works when the mask itself needs splitting.
+
+declare <vscale x 128 x i8> @llvm.vp.sadd.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32)
+
+define <vscale x 128 x i8> @vsadd_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv128i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    sub a2, a1, a0
+; CHECK-NEXT:    sltu a3, a1, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT:    bltu a1, a0, .LBB50_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:  .LBB50_2:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+  %v = call <vscale x 128 x i8> @llvm.vp.sadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vsadd_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv128i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v16, v16, -1
+; CHECK-NEXT:    bltu a0, a1, .LBB51_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB51_2:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+  %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer
+  %v = call <vscale x 128 x i8> @llvm.vp.sadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vsadd_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsadd_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsadd_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsadd_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsadd_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsadd_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vsadd_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsadd_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsadd_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsadd_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsadd_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsadd_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vsadd_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsadd_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsadd_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsadd_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsadd_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsadd_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vsadd_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsadd_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsadd_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsadd_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsadd_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsadd_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vsadd_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsadd_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsadd_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsadd_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsadd_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsadd_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vsadd_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsadd_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsadd_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsadd_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsadd_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsadd_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vsadd_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsadd_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsadd_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsadd_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsadd_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsadd_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vsadd_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsadd_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsadd_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsadd_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsadd_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsadd_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vsadd_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsadd_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsadd_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsadd_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsadd_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsadd_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vsadd_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsadd_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsadd_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsadd_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsadd_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsadd_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vsadd_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsadd_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsadd_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsadd_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vsadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsadd_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsadd_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+; Test that split-legalization works then the mask needs manual splitting.
+
+declare <vscale x 32 x i32> @llvm.vp.sadd.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i32> @vsadd_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a2, a1, 2
+; CHECK-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT:    bltu a0, a1, .LBB118_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB118_2:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i32> @llvm.vp.sadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i32> %v
+}
+
+define <vscale x 32 x i32> @vsadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v16, v16, -1
+; CHECK-NEXT:    bltu a0, a1, .LBB119_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB119_2:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i32> @llvm.vp.sadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vsadd_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsadd_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsadd_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv1i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v9, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_nxv1i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv1i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v9
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_nxv1i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsadd_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsadd_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vsadd_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsadd_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsadd_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v10, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_nxv2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv2i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_nxv2i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsadd_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsadd_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vsadd_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsadd_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsadd_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v12, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_nxv4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv4i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v12
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_nxv4i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsadd_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsadd_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vsadd_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsadd_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsadd.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsadd_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_nxv8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv8i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vsadd.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsadd_vx_nxv8i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsadd.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsadd_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsadd_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsadd.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll
new file mode 100644
index 00000000000000..c0779e508c0a91
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll
@@ -0,0 +1,2014 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 8 x i7> @llvm.vp.uadd.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i7> @vsaddu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 127
+; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    vminu.vx v8, v8, a2, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i7> @llvm.vp.uadd.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl)
+  ret <vscale x 8 x i7> %v
+}
+
+declare <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vsaddu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vsaddu_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsaddu_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsaddu_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsaddu_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsaddu_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsaddu_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32)
+
+define <vscale x 3 x i8> @vsaddu_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsaddu_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsaddu_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsaddu_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsaddu_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsaddu_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vsaddu_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsaddu_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsaddu_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsaddu_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsaddu_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsaddu_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vsaddu_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsaddu_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsaddu_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsaddu_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsaddu_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsaddu_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vsaddu_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsaddu_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsaddu_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsaddu_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsaddu_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsaddu_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vsaddu_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsaddu_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsaddu_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsaddu_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsaddu_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsaddu_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vsaddu_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsaddu_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsaddu_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsaddu_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsaddu_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsaddu_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+; Test that split-legalization works when the mask itself needs splitting.
+
+declare <vscale x 128 x i8> @llvm.vp.uadd.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32)
+
+define <vscale x 128 x i8> @vsaddu_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv128i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    sub a2, a1, a0
+; CHECK-NEXT:    sltu a3, a1, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v16, v16, -1, v0.t
+; CHECK-NEXT:    bltu a1, a0, .LBB50_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a1, a0
+; CHECK-NEXT:  .LBB50_2:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+  %v = call <vscale x 128 x i8> @llvm.vp.uadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vsaddu_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv128i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v16, v16, -1
+; CHECK-NEXT:    bltu a0, a1, .LBB51_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB51_2:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+  %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer
+  %v = call <vscale x 128 x i8> @llvm.vp.uadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vsaddu_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsaddu_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsaddu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsaddu_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsaddu_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsaddu_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vsaddu_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsaddu_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsaddu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsaddu_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsaddu_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsaddu_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vsaddu_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsaddu_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsaddu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsaddu_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsaddu_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsaddu_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vsaddu_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsaddu_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsaddu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsaddu_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsaddu_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsaddu_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vsaddu_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsaddu_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsaddu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsaddu_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsaddu_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsaddu_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vsaddu_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsaddu_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsaddu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsaddu_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsaddu_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsaddu_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vsaddu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsaddu_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsaddu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsaddu_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsaddu_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsaddu_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vsaddu_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsaddu_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsaddu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsaddu_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsaddu_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsaddu_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vsaddu_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsaddu_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsaddu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsaddu_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsaddu_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsaddu_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vsaddu_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsaddu_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsaddu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsaddu_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsaddu_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsaddu_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vsaddu_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsaddu_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsaddu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsaddu_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsaddu_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsaddu_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+; Test that split-legalization works then the mask needs manual splitting.
+
+declare <vscale x 32 x i32> @llvm.vp.uadd.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i32> @vsaddu_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a2, a1, 2
+; CHECK-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vx v0, v0, a2
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v16, v16, -1, v0.t
+; CHECK-NEXT:    bltu a0, a1, .LBB118_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB118_2:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i32> @llvm.vp.uadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i32> %v
+}
+
+define <vscale x 32 x i32> @vsaddu_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v16, v16, -1
+; CHECK-NEXT:    bltu a0, a1, .LBB119_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB119_2:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i32> @llvm.vp.uadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vsaddu_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsaddu_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsaddu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv1i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v9, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_nxv1i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsaddu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv1i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v9
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_nxv1i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsaddu_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsaddu_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vsaddu_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsaddu_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsaddu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v10, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_nxv2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsaddu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv2i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_nxv2i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsaddu_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsaddu_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vsaddu_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsaddu_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsaddu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v12, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_nxv4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsaddu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv4i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v12
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_nxv4i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsaddu_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsaddu_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vsaddu_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsaddu_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsaddu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_nxv8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsaddu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv8i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vsaddu.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vsaddu_vx_nxv8i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsaddu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsaddu_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsaddu_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vsaddu.vi v8, v8, -1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
new file mode 100644
index 00000000000000..2d51a2ee44f658
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
@@ -0,0 +1,2067 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 8 x i7> @llvm.vp.ssub.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i7> @vssub_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vsra.vi v8, v8, 1
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 63
+; CHECK-NEXT:    vmin.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    li a0, 192
+; CHECK-NEXT:    vmax.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i7> @llvm.vp.ssub.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl)
+  ret <vscale x 8 x i7> %v
+}
+
+declare <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vssub_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vssub_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssub_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssub_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssub_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssub_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssub_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32)
+
+define <vscale x 3 x i8> @vssub_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssub_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssub_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssub_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssub_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssub_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vssub_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssub_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssub_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssub_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssub_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssub_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vssub_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssub_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssub_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssub_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssub_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssub_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vssub_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssub_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssub_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssub_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssub_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssub_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vssub_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssub_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssub_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssub_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssub_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssub_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vssub_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssub_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssub_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssub_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssub_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssub_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+; Test that split-legalization works when the mask itself needs splitting.
+
+declare <vscale x 128 x i8> @llvm.vp.ssub.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32)
+
+define <vscale x 128 x i8> @vssub_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv128i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a2, a2, 3
+; CHECK-NEXT:    sub a0, a1, a2
+; CHECK-NEXT:    sltu a3, a1, a0
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a3, a3, a0
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    bltu a1, a2, .LBB50_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a1, a2
+; CHECK-NEXT:  .LBB50_2:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+  %v = call <vscale x 128 x i8> @llvm.vp.ssub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vssub_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv128i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a3, a3, a2
+; CHECK-NEXT:    li a2, -1
+; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v16, v16, a2
+; CHECK-NEXT:    bltu a0, a1, .LBB51_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB51_2:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a2
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+  %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer
+  %v = call <vscale x 128 x i8> @llvm.vp.ssub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vssub_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssub_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssub_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssub_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssub_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssub_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vssub_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssub_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssub_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssub_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssub_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssub_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vssub_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssub_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssub_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssub_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssub_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssub_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vssub_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssub_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssub_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssub_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssub_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssub_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vssub_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssub_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssub_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssub_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssub_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssub_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vssub_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssub_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssub_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssub_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssub_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssub_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vssub_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssub_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssub_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssub_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssub_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssub_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vssub_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssub_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssub_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssub_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssub_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssub_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vssub_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssub_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssub_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssub_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssub_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssub_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vssub_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssub_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssub_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssub_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssub_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssub_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vssub_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssub_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssub_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssub_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssub_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssub_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+; Test that split-legalization works then the mask needs manual splitting.
+
+declare <vscale x 32 x i32> @llvm.vp.ssub.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i32> @vssub_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a1, a2, 2
+; CHECK-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vx v0, v0, a1
+; CHECK-NEXT:    slli a2, a2, 1
+; CHECK-NEXT:    sub a1, a0, a2
+; CHECK-NEXT:    sltu a3, a0, a1
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a3, a3, a1
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v16, v16, a1, v0.t
+; CHECK-NEXT:    bltu a0, a2, .LBB118_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a2
+; CHECK-NEXT:  .LBB118_2:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i32> @llvm.vp.ssub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i32> %v
+}
+
+define <vscale x 32 x i32> @vssub_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a3, a3, a2
+; CHECK-NEXT:    li a2, -1
+; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v16, v16, a2
+; CHECK-NEXT:    bltu a0, a1, .LBB119_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB119_2:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a2
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i32> @llvm.vp.ssub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vssub_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssub_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv1i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v9, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_nxv1i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssub_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv1i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v9
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_nxv1i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssub_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssub_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vssub_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssub_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v10, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_nxv2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssub_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv2i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_nxv2i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssub_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssub_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vssub_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssub_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v12, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_nxv4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssub_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv4i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v12
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_nxv4i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssub_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssub_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vssub_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssub_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssub.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_nxv8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssub_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv8i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vssub.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssub_vx_nxv8i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssub.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssub_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssub_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssub.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
new file mode 100644
index 00000000000000..e5589ce1a9bc6e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
@@ -0,0 +1,2065 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 8 x i7> @llvm.vp.usub.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i7> @vssubu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 127
+; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vand.vx v8, v8, a2
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vand.vx v9, v9, a2
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i7> @llvm.vp.usub.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl)
+  ret <vscale x 8 x i7> %v
+}
+
+declare <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vssubu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vssubu_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssubu_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssubu_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssubu_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssubu_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssubu_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32)
+
+define <vscale x 3 x i8> @vssubu_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssubu_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssubu_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssubu_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssubu_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssubu_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv3i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vssubu_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssubu_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssubu_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssubu_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssubu_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssubu_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vssubu_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssubu_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssubu_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssubu_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssubu_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssubu_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vssubu_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssubu_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssubu_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssubu_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssubu_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssubu_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vssubu_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssubu_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssubu_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssubu_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssubu_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssubu_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vssubu_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssubu_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssubu_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssubu_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssubu_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssubu_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv64i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+; Test that split-legalization works when the mask itself needs splitting.
+
+declare <vscale x 128 x i8> @llvm.vp.usub.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32)
+
+define <vscale x 128 x i8> @vssubu_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv128i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a2, a2, 3
+; CHECK-NEXT:    sub a0, a1, a2
+; CHECK-NEXT:    sltu a3, a1, a0
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a3, a3, a0
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v16, v16, a0, v0.t
+; CHECK-NEXT:    bltu a1, a2, .LBB50_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a1, a2
+; CHECK-NEXT:  .LBB50_2:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+  %v = call <vscale x 128 x i8> @llvm.vp.usub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vssubu_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv128i8_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 3
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a3, a3, a2
+; CHECK-NEXT:    li a2, -1
+; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v16, v16, a2
+; CHECK-NEXT:    bltu a0, a1, .LBB51_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB51_2:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a2
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+  %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+  %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer
+  %v = call <vscale x 128 x i8> @llvm.vp.usub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vssubu_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssubu_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssubu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssubu_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssubu_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssubu_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vssubu_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssubu_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssubu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssubu_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssubu_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssubu_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vssubu_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssubu_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssubu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssubu_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssubu_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssubu_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vssubu_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssubu_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssubu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssubu_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssubu_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssubu_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vssubu_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssubu_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssubu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssubu_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssubu_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssubu_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vssubu_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssubu_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssubu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssubu_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssubu_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssubu_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i16_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vssubu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssubu_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssubu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssubu_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssubu_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssubu_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vssubu_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssubu_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssubu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssubu_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssubu_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssubu_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vssubu_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssubu_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssubu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssubu_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssubu_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssubu_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vssubu_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssubu_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssubu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssubu_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssubu_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssubu_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vssubu_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssubu_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssubu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssubu_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssubu_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssubu_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+; Test that split-legalization works then the mask needs manual splitting.
+
+declare <vscale x 32 x i32> @llvm.vp.usub.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i32> @vssubu_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a1, a2, 2
+; CHECK-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vx v0, v0, a1
+; CHECK-NEXT:    slli a2, a2, 1
+; CHECK-NEXT:    sub a1, a0, a2
+; CHECK-NEXT:    sltu a3, a0, a1
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a3, a3, a1
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v16, v16, a1, v0.t
+; CHECK-NEXT:    bltu a0, a2, .LBB118_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a2
+; CHECK-NEXT:  .LBB118_2:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i32> @llvm.vp.usub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i32> %v
+}
+
+define <vscale x 32 x i32> @vssubu_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i32_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    sub a2, a0, a1
+; CHECK-NEXT:    sltu a3, a0, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    and a3, a3, a2
+; CHECK-NEXT:    li a2, -1
+; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v16, v16, a2
+; CHECK-NEXT:    bltu a0, a1, .LBB119_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:  .LBB119_2:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a2
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+  %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+  %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i32> @llvm.vp.usub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vssubu_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssubu_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssubu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv1i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v9, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_nxv1i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssubu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv1i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v9
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_nxv1i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssubu_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssubu_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vssubu_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssubu_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssubu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v10, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_nxv2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssubu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv2i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v10
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_nxv2i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssubu_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssubu_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vssubu_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssubu_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssubu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v12, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_nxv4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssubu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv4i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v12
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_nxv4i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssubu_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssubu_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vssubu_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v16, v0.t
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssubu_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssubu.vv v8, v8, v16
+; CHECK-NEXT:    ret
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssubu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_nxv8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssubu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv8i64_unmasked:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vssubu.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vssubu_vx_nxv8i64_unmasked:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vssubu.vx v8, v8, a0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssubu_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssubu_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i64_unmasked:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT:    vssubu.vx v8, v8, a1
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+  %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}

diff  --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index 7a9d91cc76b34a..e3462f0f33f118 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -163,6 +163,14 @@ class VPIntrinsicTest : public testing::Test {
         << "(<8 x i16>, i1 immarg, <8 x i1>, i32) ";
     Str << " declare <8 x i16> @llvm.vp.cttz.v8i16"
         << "(<8 x i16>, i1 immarg, <8 x i1>, i32) ";
+    Str << " declare <8 x i16> @llvm.vp.sadd.sat.v8i16"
+        << "(<8 x i16>, <8 x i16>, <8 x i1>, i32) ";
+    Str << " declare <8 x i16> @llvm.vp.uadd.sat.v8i16"
+        << "(<8 x i16>, <8 x i16>, <8 x i1>, i32) ";
+    Str << " declare <8 x i16> @llvm.vp.ssub.sat.v8i16"
+        << "(<8 x i16>, <8 x i16>, <8 x i1>, i32) ";
+    Str << " declare <8 x i16> @llvm.vp.usub.sat.v8i16"
+        << "(<8 x i16>, <8 x i16>, <8 x i1>, i32) ";
     Str << " declare <8 x i16> @llvm.vp.fshl.v8i16"
         << "(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i1>, i32) ";
     Str << " declare <8 x i16> @llvm.vp.fshr.v8i16"


        


More information about the llvm-commits mailing list