[clang] dfbb9a0 - [RISCV] Implement intrinsics for XAndesVDot (#141441)

Fri May 30 23:12:05 PDT 2025

Author: Jim Lin
Date: 2025-05-31T14:12:01+08:00
New Revision: dfbb9a0e3065332411f1b56de5aced3164fe69f1

URL: https://github.com/llvm/llvm-project/commit/dfbb9a0e3065332411f1b56de5aced3164fe69f1
DIFF: https://github.com/llvm/llvm-project/commit/dfbb9a0e3065332411f1b56de5aced3164fe69f1.diff

LOG: [RISCV] Implement intrinsics for XAndesVDot (#141441)

This patch implements clang intrinsic support for XAndesVDot.

The document for the intrinsics can be found at:

https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot
and with policy variants
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/policy_funcs/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot

Co-authored-by: Tony Chuan-Yue Yuan <yuan593 at andestech.com>

Added: 
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dots.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dotsu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dotu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dots.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dotsu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dotu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dots.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dotsu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dotu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dots.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dotsu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dotu.c

Modified: 
    clang/include/clang/Basic/riscv_andes_vector.td
    clang/include/clang/Support/RISCVVIntrinsicUtils.h
    clang/lib/Sema/SemaRISCV.cpp
    clang/lib/Support/RISCVVIntrinsicUtils.cpp
    clang/utils/TableGen/RISCVVEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/riscv_andes_vector.td b/clang/include/clang/Basic/riscv_andes_vector.td
index 8c202a8b6ff41..1498ce2dcdf9e 100644

--- a/clang/include/clang/Basic/riscv_andes_vector.td
+++ b/clang/include/clang/Basic/riscv_andes_vector.td
@@ -81,3 +81,24 @@ let ManualCodegen = [{
     defm nds_vfpmadb : RVVFPMAD;
   }
 }
+
+// Andes Vector Dot Product Extension (XAndesVDot)
+
+multiclass RVVD4DOT<list<list<string>> i_suffixes_prototypes,
+                    list<list<string>> l_suffixes_prototypes> {
+  let RequiredFeatures = ["Xandesvdot"],
+      UnMaskedPolicyScheme = HasPolicyOperand,
+      HasMaskedOffOperand = false,
+      Log2LMUL = [-1, 0, 1, 2, 3],
+      OverloadedName = NAME in {
+    defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "i", i_suffixes_prototypes>;
+    defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "l", l_suffixes_prototypes>;
+  }
+}
+
+defm nds_vd4dots  : RVVD4DOT<[["vv", "v", "vv(FixedSEW:8)v(FixedSEW:8)v"]],
+                             [["vv", "v", "vv(FixedSEW:16)v(FixedSEW:16)v"]]>;
+defm nds_vd4dotu  : RVVD4DOT<[["vv", "Uv", "UvUv(FixedSEW:8)Uv(FixedSEW:8)Uv"]],
+                             [["vv", "Uv", "UvUv(FixedSEW:16)Uv(FixedSEW:16)Uv"]]>;
+defm nds_vd4dotsu : RVVD4DOT<[["vv", "v", "vv(FixedSEW:8)v(FixedSEW:8)Uv"]],
+                             [["vv", "v", "vv(FixedSEW:16)v(FixedSEW:16)Uv"]]>;

diff  --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
index bbcf16dc36e71..ddb527597c71c 100644
--- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h
+++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
@@ -489,6 +489,7 @@ class RVVIntrinsic {
 enum RVVRequire {
   RVV_REQ_RV64,
   RVV_REQ_Zvfhmin,
+  RVV_REQ_Xandesvdot,
   RVV_REQ_Xandesvpackfph,
   RVV_REQ_Xsfvcp,
   RVV_REQ_Xsfvfnrclipxfqf,

diff  --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp
index 481bf8bd22cc1..830df05c9f042 100644
--- a/clang/lib/Sema/SemaRISCV.cpp
+++ b/clang/lib/Sema/SemaRISCV.cpp
@@ -225,6 +225,7 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics(
   const TargetInfo &TI = Context.getTargetInfo();
   static const std::pair<const char *, unsigned> FeatureCheckList[] = {
       {"64bit", RVV_REQ_RV64},
+      {"xandesvdot", RVV_REQ_Xandesvdot},
       {"xandesvpackfph", RVV_REQ_Xandesvpackfph},
       {"xsfvcp", RVV_REQ_Xsfvcp},
       {"xsfvfnrclipxfqf", RVV_REQ_Xsfvfnrclipxfqf},

diff  --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp
index d954c1617ae1a..37f95411af195 100644
--- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp
+++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp
@@ -1214,6 +1214,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, enum RVVRequire Require) {
   switch (Require) {
     STRINGIFY(RVV_REQ_RV64)
     STRINGIFY(RVV_REQ_Zvfhmin)
+    STRINGIFY(RVV_REQ_Xandesvdot)
     STRINGIFY(RVV_REQ_Xandesvpackfph)
     STRINGIFY(RVV_REQ_Xsfvcp)
     STRINGIFY(RVV_REQ_Xsfvfnrclipxfqf)

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dots.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dots.c
new file mode 100644
index 0000000000000..0b4fcc742b996
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dots.c
@@ -0,0 +1,170 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32mf2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1(vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2(vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4(vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8(vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m8(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1(vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2(vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4(vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8(vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m8(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2_m(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32mf2_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1_m(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m1_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2_m(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m2_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4_m(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m4_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8_m(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m8_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1_m(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m1_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2_m(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m2_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4_m(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m4_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8_m(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m8_m(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dotsu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dotsu.c
new file mode 100644
index 0000000000000..7446f7e10cf4b
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dotsu.c
@@ -0,0 +1,170 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32mf2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1(vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2(vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4(vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8(vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m8(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1(vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2(vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4(vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8(vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m8(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2_m(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32mf2_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1_m(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m1_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2_m(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m2_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4_m(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m4_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8_m(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m8_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1_m(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m1_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2_m(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m2_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4_m(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m4_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8_m(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m8_m(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dotu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dotu.c
new file mode 100644
index 0000000000000..960bc46814aa6
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/nds_vd4dotu.c
@@ -0,0 +1,170 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2(vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32mf2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1(vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2(vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4(vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8(vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m8(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1(vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2(vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4(vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8(vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m8(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2_m(vbool64_t mask, vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32mf2_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1_m(vbool32_t mask, vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m1_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2_m(vbool16_t mask, vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m2_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4_m(vbool8_t mask, vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m4_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8_m(vbool4_t mask, vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m8_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1_m(vbool64_t mask, vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m1_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2_m(vbool32_t mask, vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m2_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4_m(vbool16_t mask, vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m4_m(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8_m(vbool8_t mask, vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m8_m(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dots.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dots.c
new file mode 100644
index 0000000000000..43caa2dec6e6c
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dots.c
@@ -0,0 +1,170 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1(vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2(vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4(vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8(vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1(vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2(vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4(vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8(vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2_m(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1_m(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2_m(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4_m(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8_m(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1_m(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2_m(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4_m(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8_m(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dotsu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dotsu.c
new file mode 100644
index 0000000000000..36a30b0c67cc3
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dotsu.c
@@ -0,0 +1,170 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1(vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2(vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4(vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8(vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1(vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2(vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4(vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8(vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2_m(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1_m(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2_m(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4_m(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8_m(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1_m(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2_m(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4_m(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8_m(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dotu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dotu.c
new file mode 100644
index 0000000000000..3d4583c8f05e5
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/nds_vd4dotu.c
@@ -0,0 +1,170 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2(vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1(vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2(vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4(vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8(vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1(vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2(vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4(vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8(vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2_m(vbool64_t mask, vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1_m(vbool32_t mask, vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2_m(vbool16_t mask, vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4_m(vbool8_t mask, vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8_m(vbool4_t mask, vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1_m(vbool64_t mask, vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2_m(vbool32_t mask, vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4_m(vbool16_t mask, vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8_m(vbool8_t mask, vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dots.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dots.c
new file mode 100644
index 0000000000000..7398b90c8897d
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dots.c
@@ -0,0 +1,332 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2_tu(vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32mf2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1_tu(vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m1_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2_tu(vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4_tu(vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8_tu(vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m8_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1_tu(vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m1_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2_tu(vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4_tu(vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8_tu(vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m8_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2_tum(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32mf2_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1_tum(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m1_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2_tum(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m2_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4_tum(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m4_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8_tum(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m8_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1_tum(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m1_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2_tum(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m2_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4_tum(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m4_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8_tum(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m8_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2_tumu(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32mf2_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1_tumu(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m1_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2_tumu(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m2_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4_tumu(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m4_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8_tumu(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m8_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1_tumu(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m1_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2_tumu(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m2_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4_tumu(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m4_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8_tumu(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m8_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2_mu(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32mf2_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1_mu(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m1_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2_mu(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m2_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4_mu(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m4_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8_mu(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i32m8_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1_mu(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m1_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2_mu(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m2_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4_mu(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m4_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8_mu(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_vv_i64m8_mu(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dotsu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dotsu.c
new file mode 100644
index 0000000000000..4909e6b642c39
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dotsu.c
@@ -0,0 +1,332 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2_tu(vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32mf2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1_tu(vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m1_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2_tu(vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4_tu(vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8_tu(vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m8_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1_tu(vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m1_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2_tu(vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4_tu(vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8_tu(vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m8_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2_tum(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32mf2_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1_tum(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m1_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2_tum(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m2_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4_tum(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m4_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8_tum(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m8_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1_tum(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m1_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2_tum(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m2_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4_tum(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m4_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8_tum(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m8_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2_tumu(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32mf2_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1_tumu(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m1_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2_tumu(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m2_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4_tumu(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m4_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8_tumu(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m8_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1_tumu(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m1_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2_tumu(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m2_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4_tumu(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m4_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8_tumu(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m8_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2_mu(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32mf2_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1_mu(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m1_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2_mu(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m2_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4_mu(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m4_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8_mu(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i32m8_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1_mu(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m1_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2_mu(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m2_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4_mu(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m4_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8_mu(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_vv_i64m8_mu(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dotu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dotu.c
new file mode 100644
index 0000000000000..5f2f4b9b348c9
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/nds_vd4dotu.c
@@ -0,0 +1,332 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2_tu(vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32mf2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1_tu(vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m1_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2_tu(vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4_tu(vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8_tu(vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m8_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1_tu(vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m1_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2_tu(vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4_tu(vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8_tu(vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m8_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2_tum(vbool64_t mask, vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32mf2_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1_tum(vbool32_t mask, vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m1_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2_tum(vbool16_t mask, vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m2_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4_tum(vbool8_t mask, vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m4_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8_tum(vbool4_t mask, vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m8_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1_tum(vbool64_t mask, vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m1_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2_tum(vbool32_t mask, vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m2_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4_tum(vbool16_t mask, vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m4_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8_tum(vbool8_t mask, vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m8_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2_tumu(vbool64_t mask, vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32mf2_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1_tumu(vbool32_t mask, vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m1_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2_tumu(vbool16_t mask, vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m2_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4_tumu(vbool8_t mask, vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m4_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8_tumu(vbool4_t mask, vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m8_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1_tumu(vbool64_t mask, vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m1_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2_tumu(vbool32_t mask, vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m2_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4_tumu(vbool16_t mask, vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m4_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8_tumu(vbool8_t mask, vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m8_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2_mu(vbool64_t mask, vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32mf2_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1_mu(vbool32_t mask, vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m1_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2_mu(vbool16_t mask, vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m2_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4_mu(vbool8_t mask, vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m4_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8_mu(vbool4_t mask, vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u32m8_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1_mu(vbool64_t mask, vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m1_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2_mu(vbool32_t mask, vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m2_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4_mu(vbool16_t mask, vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m4_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8_mu(vbool8_t mask, vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_vv_u64m8_mu(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dots.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dots.c
new file mode 100644
index 0000000000000..fe9d7d7febb6d
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dots.c
@@ -0,0 +1,332 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2_tu(vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1_tu(vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2_tu(vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4_tu(vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8_tu(vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1_tu(vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2_tu(vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4_tu(vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8_tu(vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2_tum(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1_tum(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2_tum(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4_tum(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8_tum(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1_tum(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2_tum(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4_tum(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8_tum(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2_tumu(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1_tumu(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2_tumu(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4_tumu(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8_tumu(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1_tumu(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2_tumu(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4_tumu(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8_tumu(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32mf2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dots_vv_i32mf2_mu(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dots_vv_i32m1_mu(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dots_vv_i32m2_mu(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dots_vv_i32m4_mu(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i32m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dots_vv_i32m8_mu(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dots_vv_i64m1_mu(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dots_vv_i64m2_mu(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dots_vv_i64m4_mu(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dots_vv_i64m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dots_vv_i64m8_mu(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dots_mu(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dotsu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dotsu.c
new file mode 100644
index 0000000000000..d747612856853
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dotsu.c
@@ -0,0 +1,332 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2_tu(vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1_tu(vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2_tu(vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4_tu(vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8_tu(vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1_tu(vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2_tu(vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4_tu(vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8_tu(vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2_tum(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1_tum(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2_tum(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4_tum(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8_tum(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1_tum(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2_tum(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4_tum(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8_tum(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2_tumu(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1_tumu(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2_tumu(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4_tumu(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8_tumu(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1_tumu(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2_tumu(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4_tumu(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8_tumu(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32mf2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_nds_vd4dotsu_vv_i32mf2_mu(vbool64_t mask, vint32mf2_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_nds_vd4dotsu_vv_i32m1_mu(vbool32_t mask, vint32m1_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_nds_vd4dotsu_vv_i32m2_mu(vbool16_t mask, vint32m2_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_nds_vd4dotsu_vv_i32m4_mu(vbool8_t mask, vint32m4_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i32m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotsu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_nds_vd4dotsu_vv_i32m8_mu(vbool4_t mask, vint32m8_t vd, vint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_nds_vd4dotsu_vv_i64m1_mu(vbool64_t mask, vint64m1_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_nds_vd4dotsu_vv_i64m2_mu(vbool32_t mask, vint64m2_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_nds_vd4dotsu_vv_i64m4_mu(vbool16_t mask, vint64m4_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotsu_vv_i64m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotsu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_nds_vd4dotsu_vv_i64m8_mu(vbool8_t mask, vint64m8_t vd, vint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotsu_mu(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dotu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dotu.c
new file mode 100644
index 0000000000000..491524a4e5767
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/nds_vd4dotu.c
@@ -0,0 +1,332 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +xandesvdot -disable-O0-optnone  \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <andes_vector.h>
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2_tu(vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1_tu(vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2_tu(vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4_tu(vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8_tu(vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1_tu(vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2_tu(vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4_tu(vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8_tu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8_tu(vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2_tum(vbool64_t mask, vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1_tum(vbool32_t mask, vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2_tum(vbool16_t mask, vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4_tum(vbool8_t mask, vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8_tum(vbool4_t mask, vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1_tum(vbool64_t mask, vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2_tum(vbool32_t mask, vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4_tum(vbool16_t mask, vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8_tum(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8_tum(vbool8_t mask, vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tum(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2_tumu(vbool64_t mask, vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1_tumu(vbool32_t mask, vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2_tumu(vbool16_t mask, vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4_tumu(vbool8_t mask, vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8_tumu(vbool4_t mask, vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1_tumu(vbool64_t mask, vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2_tumu(vbool32_t mask, vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4_tumu(vbool16_t mask, vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8_tumu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8_tumu(vbool8_t mask, vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_tumu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32mf2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vuint32mf2_t test_nds_vd4dotu_vv_u32mf2_mu(vbool64_t mask, vuint32mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vuint32m1_t test_nds_vd4dotu_vv_u32m1_mu(vbool32_t mask, vuint32m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vuint32m2_t test_nds_vd4dotu_vv_u32m2_mu(vbool16_t mask, vuint32m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vuint32m4_t test_nds_vd4dotu_vv_u32m4_mu(vbool8_t mask, vuint32m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u32m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.nds.vd4dotu.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vuint32m8_t test_nds_vd4dotu_vv_u32m8_mu(vbool4_t mask, vuint32m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m1_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv1i64.nxv4i16.nxv4i16.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_nds_vd4dotu_vv_u64m1_mu(vbool64_t mask, vuint64m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m2_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv2i64.nxv8i16.nxv8i16.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_nds_vd4dotu_vv_u64m2_mu(vbool32_t mask, vuint64m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m4_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv4i64.nxv16i16.nxv16i16.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_nds_vd4dotu_vv_u64m4_mu(vbool16_t mask, vuint64m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_mu(mask, vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_nds_vd4dotu_vv_u64m8_mu(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.nds.vd4dotu.mask.nxv8i64.nxv32i16.nxv32i16.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_nds_vd4dotu_vv_u64m8_mu(vbool8_t mask, vuint64m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
+  return __riscv_nds_vd4dotu_mu(mask, vd, vs1, vs2, vl);
+}

diff  --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 2eae4fd07614b..c58f66568a315 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -775,6 +775,7 @@ void RVVEmitter::createRVVIntrinsics(
               .Case("RV64", RVV_REQ_RV64)
               .Case("Zvfhmin", RVV_REQ_Zvfhmin)
               .Case("Xandesvpackfph", RVV_REQ_Xandesvpackfph)
+              .Case("Xandesvdot", RVV_REQ_Xandesvdot)
               .Case("Xsfvcp", RVV_REQ_Xsfvcp)
               .Case("Xsfvfnrclipxfqf", RVV_REQ_Xsfvfnrclipxfqf)
               .Case("Xsfvfwmaccqqq", RVV_REQ_Xsfvfwmaccqqq)