[clang] b106cb4 - [4/7][Clang][RISCV] Remove default tail-undisturbed for multiply-add intrinsics

via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 23 23:46:59 PST 2023


Author: eopXD
Date: 2023-01-23T23:46:50-08:00
New Revision: b106cb4a00602fc6c9d5f8c1112394856ba061b4

URL: https://github.com/llvm/llvm-project/commit/b106cb4a00602fc6c9d5f8c1112394856ba061b4
DIFF: https://github.com/llvm/llvm-project/commit/b106cb4a00602fc6c9d5f8c1112394856ba061b4.diff

LOG: [4/7][Clang][RISCV] Remove default tail-undisturbed for multiply-add intrinsics

The destination parameter is still needed as the computation requires
so.

The default policy for non-policy (implicit) vslideup intrinsics
will be tail agnostic and mask undisturbed.

This is the 4th commit of a patch-set that aims to remove the
IsPrototypeDefaultTU special case for the rvv-intrinsics.

Please refer to the cover letter in the 1st commit (D140895) for an overview.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D140941

Added: 
    

Modified: 
    clang/include/clang/Basic/riscv_vector.td
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmadd.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmsac.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmsub.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmadd.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmsac.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmsub.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwmsac.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwnmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwnmsac.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmadd.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vnmsac.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vnmsub.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccsu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccus.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmadd.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmsac.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmsub.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmadd.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmsac.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmsub.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwmsac.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwnmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwnmsac.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmadd.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vnmsac.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vnmsub.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmacc.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccsu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccu.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccus.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index e5ec4e048a917..4e36f213677a3 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -1878,7 +1878,7 @@ defm vwmulsu : RVVOutOp0Op1BuiltinSet<"vwmulsu", "csi",
 
 // 12.13. Vector Single-Width Integer Multiply-Add Instructions
 let UnMaskedPolicyScheme = HasPolicyOperand,
-    IsPrototypeDefaultTU = true in {
+    IsPrototypeDefaultTU = false in {
 defm vmacc  : RVVIntTerBuiltinSet;
 defm vnmsac : RVVIntTerBuiltinSet;
 defm vmadd  : RVVIntTerBuiltinSet;
@@ -1990,7 +1990,7 @@ let Log2LMUL = [-2, -1, 0, 1, 2] in {
 
 // 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
 let UnMaskedPolicyScheme = HasPolicyOperand,
-    IsPrototypeDefaultTU = true in {
+    IsPrototypeDefaultTU = false in {
 defm vfmacc  : RVVFloatingTerBuiltinSet;
 defm vfnmacc : RVVFloatingTerBuiltinSet;
 defm vfmsac  : RVVFloatingTerBuiltinSet;

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmacc.c
index f89f283335ce0..c7877a3bb0abe 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmacc_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfmacc_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmacc_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfmacc_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmacc_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfmacc_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmacc_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfmacc_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmacc_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfmacc_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmacc_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfmacc_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmacc_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfmacc_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmacc_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfmacc_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmacc_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfmacc_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmacc_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfmacc_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmacc.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmacc.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmacc_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfmacc_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmacc.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmacc.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmacc_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfmacc_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat3
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmacc_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfmacc_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmacc_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfmacc_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmacc_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfmacc_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmacc_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfmacc_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmacc_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfmacc_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmacc_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfmacc_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmacc_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfmacc_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmacc_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfmacc_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmacc_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfmacc_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmacc_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfmacc_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmacc_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfmacc_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmacc_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfmacc_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmacc_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfmacc_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmacc_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfmacc_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmacc_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfmacc_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmacc.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmacc.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmacc_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfmacc_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmacc.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmacc.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmacc_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmadd.c
index 7a3d32faa6047..0776fab260607 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmadd.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmadd_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfmadd_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmadd_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfmadd_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmadd_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfmadd_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmadd_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfmadd_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmadd_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfmadd_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmadd_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfmadd_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmadd_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfmadd_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmadd_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfmadd_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmadd_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfmadd_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmadd_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfmadd_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmadd.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmadd.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmadd_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfmadd_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmadd.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmadd.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmadd_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfmadd_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmadd_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfmadd_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat3
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmadd_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfmadd_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmadd_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfmadd_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmadd_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfmadd_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmadd_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfmadd_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmadd_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfmadd_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmadd_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfmadd_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmadd_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfmadd_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmadd.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmadd.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmadd_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfmadd_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmadd.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmadd.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmadd_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfmadd_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmadd_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfmadd_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmadd_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfmadd_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmadd_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfmadd_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmadd_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfmadd_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmadd_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfmadd_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmadd_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfmadd_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmadd.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmadd.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmadd_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfmadd_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmadd.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmadd.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmadd_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmsac.c
index ceb9ff7cb1f55..fde4e4f3d14f8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmsac.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmsac_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfmsac_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmsac_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfmsac_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmsac_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfmsac_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmsac_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfmsac_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmsac_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfmsac_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmsac_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfmsac_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmsac_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfmsac_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmsac_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfmsac_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmsac_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfmsac_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmsac_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfmsac_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsac.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsac.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmsac_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfmsac_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsac.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsac.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmsac_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfmsac_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat3
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmsac_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfmsac_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmsac_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfmsac_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmsac_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfmsac_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmsac_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfmsac_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmsac_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfmsac_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmsac_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfmsac_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmsac_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfmsac_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsac.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsac.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmsac_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfmsac_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsac.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsac.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmsac_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfmsac_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmsac_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfmsac_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmsac_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfmsac_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmsac_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfmsac_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmsac_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfmsac_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmsac_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfmsac_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmsac_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfmsac_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsac.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsac.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmsac_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfmsac_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsac.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsac.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmsac_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmsub.c
index 29aaa90711d7c..15f95d8848e22 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfmsub.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmsub_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfmsub_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmsub_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfmsub_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmsub_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfmsub_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmsub_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfmsub_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmsub_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfmsub_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmsub_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfmsub_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmsub_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfmsub_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmsub_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfmsub_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmsub_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfmsub_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmsub_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfmsub_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsub.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsub.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmsub_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfmsub_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsub.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsub.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmsub_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfmsub_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmsub_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfmsub_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat3
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmsub_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfmsub_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmsub_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfmsub_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmsub_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfmsub_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmsub_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfmsub_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmsub_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfmsub_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmsub_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfmsub_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmsub_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfmsub_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsub.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsub.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmsub_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfmsub_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsub.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsub.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmsub_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfmsub_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmsub_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfmsub_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmsub_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfmsub_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmsub_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfmsub_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmsub_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfmsub_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmsub_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfmsub_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmsub_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfmsub_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsub.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsub.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmsub_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfmsub_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsub.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsub.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmsub_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmacc.c
index d027d916b66ca..7deeea8744781 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmacc_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfnmacc_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmacc_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfnmacc_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmacc_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfnmacc_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmacc_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfnmacc_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmacc_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfnmacc_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmacc_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfnmacc_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmacc_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfnmacc_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmacc_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfnmacc_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmacc_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfnmacc_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmacc_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfnmacc_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmacc.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmacc.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmacc_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfnmacc_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmacc.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmacc.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmacc_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfnmacc_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t v
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfnmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmacc_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfnmacc_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmacc_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfnmacc_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmacc_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfnmacc_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmacc_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfnmacc_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmacc_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfnmacc_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmacc_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfnmacc_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmacc_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfnmacc_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmacc.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmacc.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmacc_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfnmacc_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmacc.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmacc.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmacc_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfnmacc_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmacc_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfnmacc_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmacc_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfnmacc_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmacc_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfnmacc_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmacc_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfnmacc_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmacc_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfnmacc_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmacc_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfnmacc_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmacc.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmacc.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmacc_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfnmacc_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmacc.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmacc.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmacc_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmadd.c
index 6d95bba610e99..aaa942698c956 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmadd.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmadd_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfnmadd_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmadd_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfnmadd_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmadd_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfnmadd_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmadd_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfnmadd_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmadd_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfnmadd_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmadd_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfnmadd_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmadd_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfnmadd_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmadd_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfnmadd_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmadd_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfnmadd_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmadd_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfnmadd_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmadd.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmadd.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmadd_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfnmadd_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmadd.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmadd.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmadd_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfnmadd_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t v
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmadd_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfnmadd_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmadd_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfnmadd_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmadd_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfnmadd_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmadd_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfnmadd_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmadd_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfnmadd_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmadd_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfnmadd_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmadd_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfnmadd_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmadd_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfnmadd_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmadd.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmadd.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmadd_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfnmadd_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmadd.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmadd.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmadd_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfnmadd_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmadd_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfnmadd_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmadd_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfnmadd_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmadd_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfnmadd_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmadd_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfnmadd_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmadd_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfnmadd_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmadd_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfnmadd_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmadd.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmadd.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmadd_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfnmadd_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmadd.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmadd.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmadd_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmsac.c
index 0f467237525ba..878a491c1a533 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmsac.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmsac_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfnmsac_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmsac_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfnmsac_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmsac_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfnmsac_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmsac_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfnmsac_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmsac_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfnmsac_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmsac_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfnmsac_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmsac_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfnmsac_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmsac_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfnmsac_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmsac_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfnmsac_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmsac_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfnmsac_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsac.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsac.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmsac_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfnmsac_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsac.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsac.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmsac_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfnmsac_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfnmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmsac_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfnmsac_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmsac_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfnmsac_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmsac_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfnmsac_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmsac_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfnmsac_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmsac_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfnmsac_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmsac_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfnmsac_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmsac_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfnmsac_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsac.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsac.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmsac_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfnmsac_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsac.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsac.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmsac_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfnmsac_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmsac_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfnmsac_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmsac_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfnmsac_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmsac_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfnmsac_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmsac_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfnmsac_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmsac_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfnmsac_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmsac_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfnmsac_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsac.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsac.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmsac_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfnmsac_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsac.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsac.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmsac_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmsub.c
index ea5d8a5530454..bccbe57c16b87 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfnmsub.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmsub_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfnmsub_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmsub_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfnmsub_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmsub_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfnmsub_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmsub_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfnmsub_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmsub_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfnmsub_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmsub_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfnmsub_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmsub_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfnmsub_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmsub_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfnmsub_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmsub_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfnmsub_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmsub_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfnmsub_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsub.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsub.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmsub_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfnmsub_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsub.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsub.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmsub_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfnmsub_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmsub_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfnmsub_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmsub_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfnmsub_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmsub_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfnmsub_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmsub_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfnmsub_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmsub_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfnmsub_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmsub_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfnmsub_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmsub_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfnmsub_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmsub_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfnmsub_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsub.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsub.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmsub_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfnmsub_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsub.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsub.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmsub_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfnmsub_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmsub_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfnmsub_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmsub_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfnmsub_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmsub_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfnmsub_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmsub_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfnmsub_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmsub_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfnmsub_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmsub_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfnmsub_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsub.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsub.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmsub_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfnmsub_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsub.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsub.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmsub_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwmacc.c
index d4ca1ecf99478..6808ff16faf4e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat32mf2_t test_vfwmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwmacc_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat32mf2_t test_vfwmacc_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwmacc_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat32m1_t test_vfwmacc_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwmacc_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat32m1_t test_vfwmacc_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwmacc_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat32m2_t test_vfwmacc_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwmacc_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat32m2_t test_vfwmacc_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwmacc_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat32m4_t test_vfwmacc_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwmacc_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat32m4_t test_vfwmacc_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwmacc_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat32m8_t test_vfwmacc_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwmacc_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat32m8_t test_vfwmacc_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwmacc_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat64m1_t test_vfwmacc_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwmacc_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat64m1_t test_vfwmacc_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwmacc_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat64m2_t test_vfwmacc_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwmacc_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat64m2_t test_vfwmacc_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwmacc_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat64m4_t test_vfwmacc_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwmacc_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat64m4_t test_vfwmacc_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwmacc_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat64m8_t test_vfwmacc_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwmacc_vf_f64m8(vfloat64m8_t vd, float vs1, vfloat32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwmsac.c
index 0e7e3f4289c90..17ee5caef645e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwmsac.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat32mf2_t test_vfwmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwmsac_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat32mf2_t test_vfwmsac_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwmsac_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat32m1_t test_vfwmsac_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwmsac_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat32m1_t test_vfwmsac_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwmsac_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat32m2_t test_vfwmsac_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwmsac_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat32m2_t test_vfwmsac_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwmsac_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat32m4_t test_vfwmsac_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwmsac_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat32m4_t test_vfwmsac_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwmsac_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat32m8_t test_vfwmsac_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwmsac_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat32m8_t test_vfwmsac_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwmsac_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat64m1_t test_vfwmsac_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwmsac_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat64m1_t test_vfwmsac_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwmsac_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat64m2_t test_vfwmsac_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwmsac_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat64m2_t test_vfwmsac_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwmsac_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat64m4_t test_vfwmsac_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwmsac_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat64m4_t test_vfwmsac_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwmsac_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat64m8_t test_vfwmsac_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwmsac_vf_f64m8(vfloat64m8_t vd, float vs1, vfloat32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwnmacc.c
index eccf8d853e537..a6275fe14ed19 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwnmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwnmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwnmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat32mf2_t test_vfwnmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloa
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwnmacc_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat32mf2_t test_vfwnmacc_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwnmacc_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat32m1_t test_vfwnmacc_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwnmacc_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat32m1_t test_vfwnmacc_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwnmacc_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat32m2_t test_vfwnmacc_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwnmacc_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat32m2_t test_vfwnmacc_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwnmacc_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat32m4_t test_vfwnmacc_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwnmacc_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat32m4_t test_vfwnmacc_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwnmacc_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat32m8_t test_vfwnmacc_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwnmacc_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat32m8_t test_vfwnmacc_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwnmacc_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat64m1_t test_vfwnmacc_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwnmacc_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat64m1_t test_vfwnmacc_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwnmacc_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat64m2_t test_vfwnmacc_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwnmacc_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat64m2_t test_vfwnmacc_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwnmacc_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat64m4_t test_vfwnmacc_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwnmacc_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat64m4_t test_vfwnmacc_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwnmacc_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat64m8_t test_vfwnmacc_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwnmacc_vf_f64m8(vfloat64m8_t vd, float vs1, vfloat32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwnmsac.c
index fabfb2de3a73c..7622e3e317165 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vfwnmsac.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwnmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat32mf2_t test_vfwnmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloa
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwnmsac_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat32mf2_t test_vfwnmsac_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwnmsac_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat32m1_t test_vfwnmsac_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwnmsac_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat32m1_t test_vfwnmsac_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwnmsac_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat32m2_t test_vfwnmsac_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwnmsac_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat32m2_t test_vfwnmsac_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwnmsac_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat32m4_t test_vfwnmsac_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwnmsac_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat32m4_t test_vfwnmsac_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwnmsac_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat32m8_t test_vfwnmsac_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwnmsac_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat32m8_t test_vfwnmsac_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwnmsac_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat64m1_t test_vfwnmsac_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwnmsac_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat64m1_t test_vfwnmsac_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwnmsac_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat64m2_t test_vfwnmsac_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwnmsac_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat64m2_t test_vfwnmsac_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwnmsac_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat64m4_t test_vfwnmsac_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwnmsac_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat64m4_t test_vfwnmsac_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwnmsac_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat64m8_t test_vfwnmsac_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwnmsac_vf_f64m8(vfloat64m8_t vd, float vs1, vfloat32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmacc.c
index bd411283f693b..0da946914aac0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vmacc_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint8mf8_t test_vmacc_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vmacc_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint8mf8_t test_vmacc_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vmacc_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint8mf4_t test_vmacc_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vmacc_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint8mf4_t test_vmacc_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vmacc_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint8mf2_t test_vmacc_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vmacc_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint8mf2_t test_vmacc_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vmacc_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint8m1_t test_vmacc_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vmacc_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint8m1_t test_vmacc_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vmacc_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint8m2_t test_vmacc_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vmacc_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint8m2_t test_vmacc_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vmacc_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint8m4_t test_vmacc_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vmacc_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint8m4_t test_vmacc_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vmacc_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint8m8_t test_vmacc_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vmacc_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint8m8_t test_vmacc_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vmacc_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint16mf4_t test_vmacc_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vmacc_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint16mf4_t test_vmacc_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vmacc_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint16mf2_t test_vmacc_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vmacc_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint16mf2_t test_vmacc_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vmacc_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint16m1_t test_vmacc_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vmacc_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint16m1_t test_vmacc_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vmacc_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint16m2_t test_vmacc_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vmacc_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint16m2_t test_vmacc_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vmacc_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint16m4_t test_vmacc_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vmacc_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint16m4_t test_vmacc_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vmacc_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint16m8_t test_vmacc_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vmacc_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint16m8_t test_vmacc_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vmacc_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint32mf2_t test_vmacc_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vmacc_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint32mf2_t test_vmacc_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vmacc_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint32m1_t test_vmacc_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vmacc_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -279,7 +279,7 @@ vint32m1_t test_vmacc_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vmacc_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, size_t vl) {
@@ -288,7 +288,7 @@ vint32m2_t test_vmacc_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vmacc_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -297,7 +297,7 @@ vint32m2_t test_vmacc_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vmacc_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, size_t vl) {
@@ -306,7 +306,7 @@ vint32m4_t test_vmacc_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vmacc_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_t vl) {
@@ -315,7 +315,7 @@ vint32m4_t test_vmacc_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vmacc_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, size_t vl) {
@@ -324,7 +324,7 @@ vint32m8_t test_vmacc_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vmacc_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_t vl) {
@@ -333,7 +333,7 @@ vint32m8_t test_vmacc_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vmacc_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, size_t vl) {
@@ -342,7 +342,7 @@ vint64m1_t test_vmacc_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vmacc_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_t vl) {
@@ -351,7 +351,7 @@ vint64m1_t test_vmacc_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vmacc_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, size_t vl) {
@@ -360,7 +360,7 @@ vint64m2_t test_vmacc_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vmacc_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_t vl) {
@@ -369,7 +369,7 @@ vint64m2_t test_vmacc_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vmacc_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, size_t vl) {
@@ -378,7 +378,7 @@ vint64m4_t test_vmacc_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vmacc_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_t vl) {
@@ -387,7 +387,7 @@ vint64m4_t test_vmacc_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vmacc_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, size_t vl) {
@@ -396,7 +396,7 @@ vint64m8_t test_vmacc_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vmacc_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_t vl) {
@@ -405,7 +405,7 @@ vint64m8_t test_vmacc_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vmacc_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -414,7 +414,7 @@ vuint8mf8_t test_vmacc_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vmacc_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -423,7 +423,7 @@ vuint8mf8_t test_vmacc_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vmacc_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -432,7 +432,7 @@ vuint8mf4_t test_vmacc_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vmacc_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -441,7 +441,7 @@ vuint8mf4_t test_vmacc_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vmacc_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -450,7 +450,7 @@ vuint8mf2_t test_vmacc_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vmacc_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -459,7 +459,7 @@ vuint8mf2_t test_vmacc_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vmacc_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -468,7 +468,7 @@ vuint8m1_t test_vmacc_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vmacc_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -477,7 +477,7 @@ vuint8m1_t test_vmacc_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vmacc_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -486,7 +486,7 @@ vuint8m2_t test_vmacc_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vmacc_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -495,7 +495,7 @@ vuint8m2_t test_vmacc_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vmacc_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -504,7 +504,7 @@ vuint8m4_t test_vmacc_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vmacc_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -513,7 +513,7 @@ vuint8m4_t test_vmacc_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vmacc_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
@@ -522,7 +522,7 @@ vuint8m8_t test_vmacc_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vmacc_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t vl) {
@@ -531,7 +531,7 @@ vuint8m8_t test_vmacc_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vmacc_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -540,7 +540,7 @@ vuint16mf4_t test_vmacc_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vmacc_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -549,7 +549,7 @@ vuint16mf4_t test_vmacc_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vmacc_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -558,7 +558,7 @@ vuint16mf2_t test_vmacc_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vmacc_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -567,7 +567,7 @@ vuint16mf2_t test_vmacc_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vmacc_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -576,7 +576,7 @@ vuint16m1_t test_vmacc_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vmacc_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -585,7 +585,7 @@ vuint16m1_t test_vmacc_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vmacc_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -594,7 +594,7 @@ vuint16m2_t test_vmacc_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vmacc_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -603,7 +603,7 @@ vuint16m2_t test_vmacc_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vmacc_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -612,7 +612,7 @@ vuint16m4_t test_vmacc_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vmacc_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -621,7 +621,7 @@ vuint16m4_t test_vmacc_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vmacc_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
@@ -630,7 +630,7 @@ vuint16m8_t test_vmacc_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vmacc_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, size_t vl) {
@@ -639,7 +639,7 @@ vuint16m8_t test_vmacc_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vmacc_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -648,7 +648,7 @@ vuint32mf2_t test_vmacc_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vmacc_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -657,7 +657,7 @@ vuint32mf2_t test_vmacc_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vmacc_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -666,7 +666,7 @@ vuint32m1_t test_vmacc_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vmacc_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -675,7 +675,7 @@ vuint32m1_t test_vmacc_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vmacc_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -684,7 +684,7 @@ vuint32m2_t test_vmacc_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vmacc_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -693,7 +693,7 @@ vuint32m2_t test_vmacc_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vmacc_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -702,7 +702,7 @@ vuint32m4_t test_vmacc_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vmacc_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, size_t vl) {
@@ -711,7 +711,7 @@ vuint32m4_t test_vmacc_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vmacc_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2, size_t vl) {
@@ -720,7 +720,7 @@ vuint32m8_t test_vmacc_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vmacc_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, size_t vl) {
@@ -729,7 +729,7 @@ vuint32m8_t test_vmacc_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vmacc_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2, size_t vl) {
@@ -738,7 +738,7 @@ vuint64m1_t test_vmacc_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vmacc_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, size_t vl) {
@@ -747,7 +747,7 @@ vuint64m1_t test_vmacc_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vmacc_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2, size_t vl) {
@@ -756,7 +756,7 @@ vuint64m2_t test_vmacc_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vmacc_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, size_t vl) {
@@ -765,7 +765,7 @@ vuint64m2_t test_vmacc_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vmacc_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2, size_t vl) {
@@ -774,7 +774,7 @@ vuint64m4_t test_vmacc_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vmacc_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, size_t vl) {
@@ -783,7 +783,7 @@ vuint64m4_t test_vmacc_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vmacc_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2, size_t vl) {
@@ -792,7 +792,7 @@ vuint64m8_t test_vmacc_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vmacc_vx_u64m8(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmadd.c
index 185f14c115684..2ab97789de0ed 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vmadd.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vmadd_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint8mf8_t test_vmadd_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vmadd_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint8mf8_t test_vmadd_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vmadd_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint8mf4_t test_vmadd_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vmadd_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint8mf4_t test_vmadd_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vmadd_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint8mf2_t test_vmadd_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vmadd_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint8mf2_t test_vmadd_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vmadd_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint8m1_t test_vmadd_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vmadd_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint8m1_t test_vmadd_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vmadd_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint8m2_t test_vmadd_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vmadd_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint8m2_t test_vmadd_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vmadd_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint8m4_t test_vmadd_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vmadd_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint8m4_t test_vmadd_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vmadd_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint8m8_t test_vmadd_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vmadd_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint8m8_t test_vmadd_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vmadd_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint16mf4_t test_vmadd_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vmadd_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint16mf4_t test_vmadd_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vmadd_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint16mf2_t test_vmadd_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vmadd_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint16mf2_t test_vmadd_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vmadd_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint16m1_t test_vmadd_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vmadd_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint16m1_t test_vmadd_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vmadd_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint16m2_t test_vmadd_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vmadd_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint16m2_t test_vmadd_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vmadd_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint16m4_t test_vmadd_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vmadd_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint16m4_t test_vmadd_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vmadd_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint16m8_t test_vmadd_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vmadd_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint16m8_t test_vmadd_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vmadd_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint32mf2_t test_vmadd_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vmadd_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint32mf2_t test_vmadd_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vmadd_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint32m1_t test_vmadd_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vmadd_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -279,7 +279,7 @@ vint32m1_t test_vmadd_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vmadd_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, size_t vl) {
@@ -288,7 +288,7 @@ vint32m2_t test_vmadd_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vmadd_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -297,7 +297,7 @@ vint32m2_t test_vmadd_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vmadd_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, size_t vl) {
@@ -306,7 +306,7 @@ vint32m4_t test_vmadd_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vmadd_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_t vl) {
@@ -315,7 +315,7 @@ vint32m4_t test_vmadd_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vmadd_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, size_t vl) {
@@ -324,7 +324,7 @@ vint32m8_t test_vmadd_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vmadd_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_t vl) {
@@ -333,7 +333,7 @@ vint32m8_t test_vmadd_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vmadd_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, size_t vl) {
@@ -342,7 +342,7 @@ vint64m1_t test_vmadd_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vmadd_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_t vl) {
@@ -351,7 +351,7 @@ vint64m1_t test_vmadd_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vmadd_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, size_t vl) {
@@ -360,7 +360,7 @@ vint64m2_t test_vmadd_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vmadd_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_t vl) {
@@ -369,7 +369,7 @@ vint64m2_t test_vmadd_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vmadd_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, size_t vl) {
@@ -378,7 +378,7 @@ vint64m4_t test_vmadd_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vmadd_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_t vl) {
@@ -387,7 +387,7 @@ vint64m4_t test_vmadd_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vmadd_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, size_t vl) {
@@ -396,7 +396,7 @@ vint64m8_t test_vmadd_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vmadd_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_t vl) {
@@ -405,7 +405,7 @@ vint64m8_t test_vmadd_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vmadd_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -414,7 +414,7 @@ vuint8mf8_t test_vmadd_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vmadd_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -423,7 +423,7 @@ vuint8mf8_t test_vmadd_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vmadd_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -432,7 +432,7 @@ vuint8mf4_t test_vmadd_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vmadd_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -441,7 +441,7 @@ vuint8mf4_t test_vmadd_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vmadd_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -450,7 +450,7 @@ vuint8mf2_t test_vmadd_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vmadd_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -459,7 +459,7 @@ vuint8mf2_t test_vmadd_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vmadd_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -468,7 +468,7 @@ vuint8m1_t test_vmadd_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vmadd_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -477,7 +477,7 @@ vuint8m1_t test_vmadd_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vmadd_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -486,7 +486,7 @@ vuint8m2_t test_vmadd_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vmadd_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -495,7 +495,7 @@ vuint8m2_t test_vmadd_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vmadd_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -504,7 +504,7 @@ vuint8m4_t test_vmadd_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vmadd_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -513,7 +513,7 @@ vuint8m4_t test_vmadd_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vmadd_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
@@ -522,7 +522,7 @@ vuint8m8_t test_vmadd_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vmadd_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t vl) {
@@ -531,7 +531,7 @@ vuint8m8_t test_vmadd_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vmadd_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -540,7 +540,7 @@ vuint16mf4_t test_vmadd_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vmadd_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -549,7 +549,7 @@ vuint16mf4_t test_vmadd_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vmadd_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -558,7 +558,7 @@ vuint16mf2_t test_vmadd_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vmadd_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -567,7 +567,7 @@ vuint16mf2_t test_vmadd_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vmadd_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -576,7 +576,7 @@ vuint16m1_t test_vmadd_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vmadd_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -585,7 +585,7 @@ vuint16m1_t test_vmadd_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vmadd_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -594,7 +594,7 @@ vuint16m2_t test_vmadd_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vmadd_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -603,7 +603,7 @@ vuint16m2_t test_vmadd_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vmadd_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -612,7 +612,7 @@ vuint16m4_t test_vmadd_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vmadd_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -621,7 +621,7 @@ vuint16m4_t test_vmadd_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vmadd_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
@@ -630,7 +630,7 @@ vuint16m8_t test_vmadd_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vmadd_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, size_t vl) {
@@ -639,7 +639,7 @@ vuint16m8_t test_vmadd_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vmadd_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -648,7 +648,7 @@ vuint32mf2_t test_vmadd_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vmadd_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -657,7 +657,7 @@ vuint32mf2_t test_vmadd_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vmadd_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -666,7 +666,7 @@ vuint32m1_t test_vmadd_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vmadd_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -675,7 +675,7 @@ vuint32m1_t test_vmadd_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vmadd_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -684,7 +684,7 @@ vuint32m2_t test_vmadd_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vmadd_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -693,7 +693,7 @@ vuint32m2_t test_vmadd_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vmadd_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -702,7 +702,7 @@ vuint32m4_t test_vmadd_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vmadd_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, size_t vl) {
@@ -711,7 +711,7 @@ vuint32m4_t test_vmadd_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vmadd_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2, size_t vl) {
@@ -720,7 +720,7 @@ vuint32m8_t test_vmadd_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vmadd_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, size_t vl) {
@@ -729,7 +729,7 @@ vuint32m8_t test_vmadd_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vmadd_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2, size_t vl) {
@@ -738,7 +738,7 @@ vuint64m1_t test_vmadd_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vmadd_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, size_t vl) {
@@ -747,7 +747,7 @@ vuint64m1_t test_vmadd_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vmadd_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2, size_t vl) {
@@ -756,7 +756,7 @@ vuint64m2_t test_vmadd_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vmadd_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, size_t vl) {
@@ -765,7 +765,7 @@ vuint64m2_t test_vmadd_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vmadd_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2, size_t vl) {
@@ -774,7 +774,7 @@ vuint64m4_t test_vmadd_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vmadd_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, size_t vl) {
@@ -783,7 +783,7 @@ vuint64m4_t test_vmadd_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vmadd_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2, size_t vl) {
@@ -792,7 +792,7 @@ vuint64m8_t test_vmadd_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vmadd_vx_u64m8(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vnmsac.c
index 04787633070ee..cacaf660a4851 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vnmsac.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vnmsac_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint8mf8_t test_vnmsac_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vnmsac_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint8mf8_t test_vnmsac_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vnmsac_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint8mf4_t test_vnmsac_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vnmsac_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint8mf4_t test_vnmsac_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vnmsac_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint8mf2_t test_vnmsac_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vnmsac_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint8mf2_t test_vnmsac_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vnmsac_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint8m1_t test_vnmsac_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vnmsac_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint8m1_t test_vnmsac_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vnmsac_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint8m2_t test_vnmsac_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vnmsac_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint8m2_t test_vnmsac_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vnmsac_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint8m4_t test_vnmsac_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vnmsac_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint8m4_t test_vnmsac_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vnmsac_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint8m8_t test_vnmsac_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vnmsac_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint8m8_t test_vnmsac_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vnmsac_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint16mf4_t test_vnmsac_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vnmsac_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint16mf4_t test_vnmsac_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vnmsac_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint16mf2_t test_vnmsac_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vnmsac_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint16mf2_t test_vnmsac_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vnmsac_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint16m1_t test_vnmsac_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vnmsac_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint16m1_t test_vnmsac_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vnmsac_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint16m2_t test_vnmsac_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vnmsac_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint16m2_t test_vnmsac_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vnmsac_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint16m4_t test_vnmsac_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vnmsac_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint16m4_t test_vnmsac_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vnmsac_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint16m8_t test_vnmsac_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vnmsac_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint16m8_t test_vnmsac_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vnmsac_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint32mf2_t test_vnmsac_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vnmsac_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint32mf2_t test_vnmsac_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vnmsac_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint32m1_t test_vnmsac_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vnmsac_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -279,7 +279,7 @@ vint32m1_t test_vnmsac_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vnmsac_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, size_t vl) {
@@ -288,7 +288,7 @@ vint32m2_t test_vnmsac_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vnmsac_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -297,7 +297,7 @@ vint32m2_t test_vnmsac_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vnmsac_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, size_t vl) {
@@ -306,7 +306,7 @@ vint32m4_t test_vnmsac_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vnmsac_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_t vl) {
@@ -315,7 +315,7 @@ vint32m4_t test_vnmsac_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vnmsac_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, size_t vl) {
@@ -324,7 +324,7 @@ vint32m8_t test_vnmsac_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vnmsac_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_t vl) {
@@ -333,7 +333,7 @@ vint32m8_t test_vnmsac_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vnmsac_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, size_t vl) {
@@ -342,7 +342,7 @@ vint64m1_t test_vnmsac_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vnmsac_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_t vl) {
@@ -351,7 +351,7 @@ vint64m1_t test_vnmsac_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vnmsac_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, size_t vl) {
@@ -360,7 +360,7 @@ vint64m2_t test_vnmsac_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vnmsac_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_t vl) {
@@ -369,7 +369,7 @@ vint64m2_t test_vnmsac_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vnmsac_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, size_t vl) {
@@ -378,7 +378,7 @@ vint64m4_t test_vnmsac_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vnmsac_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_t vl) {
@@ -387,7 +387,7 @@ vint64m4_t test_vnmsac_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vnmsac_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, size_t vl) {
@@ -396,7 +396,7 @@ vint64m8_t test_vnmsac_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vnmsac_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_t vl) {
@@ -405,7 +405,7 @@ vint64m8_t test_vnmsac_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vnmsac_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -414,7 +414,7 @@ vuint8mf8_t test_vnmsac_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vnmsac_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -423,7 +423,7 @@ vuint8mf8_t test_vnmsac_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vnmsac_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -432,7 +432,7 @@ vuint8mf4_t test_vnmsac_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vnmsac_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -441,7 +441,7 @@ vuint8mf4_t test_vnmsac_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vnmsac_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -450,7 +450,7 @@ vuint8mf2_t test_vnmsac_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vnmsac_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -459,7 +459,7 @@ vuint8mf2_t test_vnmsac_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vnmsac_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -468,7 +468,7 @@ vuint8m1_t test_vnmsac_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vnmsac_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -477,7 +477,7 @@ vuint8m1_t test_vnmsac_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vnmsac_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -486,7 +486,7 @@ vuint8m2_t test_vnmsac_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vnmsac_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -495,7 +495,7 @@ vuint8m2_t test_vnmsac_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vnmsac_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -504,7 +504,7 @@ vuint8m4_t test_vnmsac_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vnmsac_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -513,7 +513,7 @@ vuint8m4_t test_vnmsac_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vnmsac_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
@@ -522,7 +522,7 @@ vuint8m8_t test_vnmsac_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vnmsac_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t vl) {
@@ -531,7 +531,7 @@ vuint8m8_t test_vnmsac_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vnmsac_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -540,7 +540,7 @@ vuint16mf4_t test_vnmsac_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vnmsac_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -549,7 +549,7 @@ vuint16mf4_t test_vnmsac_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vnmsac_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -558,7 +558,7 @@ vuint16mf2_t test_vnmsac_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vnmsac_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -567,7 +567,7 @@ vuint16mf2_t test_vnmsac_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vnmsac_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -576,7 +576,7 @@ vuint16m1_t test_vnmsac_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vnmsac_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -585,7 +585,7 @@ vuint16m1_t test_vnmsac_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vnmsac_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -594,7 +594,7 @@ vuint16m2_t test_vnmsac_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vnmsac_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -603,7 +603,7 @@ vuint16m2_t test_vnmsac_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vnmsac_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -612,7 +612,7 @@ vuint16m4_t test_vnmsac_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vnmsac_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -621,7 +621,7 @@ vuint16m4_t test_vnmsac_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vnmsac_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
@@ -630,7 +630,7 @@ vuint16m8_t test_vnmsac_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vnmsac_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, size_t vl) {
@@ -639,7 +639,7 @@ vuint16m8_t test_vnmsac_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vnmsac_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -648,7 +648,7 @@ vuint32mf2_t test_vnmsac_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vnmsac_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -657,7 +657,7 @@ vuint32mf2_t test_vnmsac_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vnmsac_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -666,7 +666,7 @@ vuint32m1_t test_vnmsac_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vnmsac_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -675,7 +675,7 @@ vuint32m1_t test_vnmsac_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vnmsac_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -684,7 +684,7 @@ vuint32m2_t test_vnmsac_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vnmsac_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -693,7 +693,7 @@ vuint32m2_t test_vnmsac_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vnmsac_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -702,7 +702,7 @@ vuint32m4_t test_vnmsac_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vnmsac_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, size_t vl) {
@@ -711,7 +711,7 @@ vuint32m4_t test_vnmsac_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vnmsac_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2, size_t vl) {
@@ -720,7 +720,7 @@ vuint32m8_t test_vnmsac_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vnmsac_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, size_t vl) {
@@ -729,7 +729,7 @@ vuint32m8_t test_vnmsac_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vnmsac_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2, size_t vl) {
@@ -738,7 +738,7 @@ vuint64m1_t test_vnmsac_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vnmsac_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, size_t vl) {
@@ -747,7 +747,7 @@ vuint64m1_t test_vnmsac_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vnmsac_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2, size_t vl) {
@@ -756,7 +756,7 @@ vuint64m2_t test_vnmsac_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vnmsac_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, size_t vl) {
@@ -765,7 +765,7 @@ vuint64m2_t test_vnmsac_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vnmsac_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2, size_t vl) {
@@ -774,7 +774,7 @@ vuint64m4_t test_vnmsac_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vnmsac_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, size_t vl) {
@@ -783,7 +783,7 @@ vuint64m4_t test_vnmsac_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vnmsac_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2, size_t vl) {
@@ -792,7 +792,7 @@ vuint64m8_t test_vnmsac_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vnmsac_vx_u64m8(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vnmsub.c
index 7488d88c679fb..adaff05453cf2 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vnmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vnmsub.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vnmsub_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint8mf8_t test_vnmsub_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vnmsub_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint8mf8_t test_vnmsub_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vnmsub_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint8mf4_t test_vnmsub_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vnmsub_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint8mf4_t test_vnmsub_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vnmsub_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint8mf2_t test_vnmsub_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vnmsub_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint8mf2_t test_vnmsub_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vnmsub_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint8m1_t test_vnmsub_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vnmsub_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint8m1_t test_vnmsub_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vnmsub_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint8m2_t test_vnmsub_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vnmsub_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint8m2_t test_vnmsub_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vnmsub_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint8m4_t test_vnmsub_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vnmsub_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint8m4_t test_vnmsub_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vnmsub_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint8m8_t test_vnmsub_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vnmsub_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint8m8_t test_vnmsub_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vnmsub_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint16mf4_t test_vnmsub_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vnmsub_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint16mf4_t test_vnmsub_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vnmsub_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint16mf2_t test_vnmsub_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vnmsub_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint16mf2_t test_vnmsub_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vnmsub_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint16m1_t test_vnmsub_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vnmsub_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint16m1_t test_vnmsub_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vnmsub_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint16m2_t test_vnmsub_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vnmsub_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint16m2_t test_vnmsub_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vnmsub_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint16m4_t test_vnmsub_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vnmsub_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint16m4_t test_vnmsub_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vnmsub_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint16m8_t test_vnmsub_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vnmsub_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint16m8_t test_vnmsub_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vnmsub_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint32mf2_t test_vnmsub_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vnmsub_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint32mf2_t test_vnmsub_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vnmsub_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint32m1_t test_vnmsub_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vnmsub_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -279,7 +279,7 @@ vint32m1_t test_vnmsub_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vnmsub_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, size_t vl) {
@@ -288,7 +288,7 @@ vint32m2_t test_vnmsub_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vnmsub_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -297,7 +297,7 @@ vint32m2_t test_vnmsub_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vnmsub_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, size_t vl) {
@@ -306,7 +306,7 @@ vint32m4_t test_vnmsub_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vnmsub_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_t vl) {
@@ -315,7 +315,7 @@ vint32m4_t test_vnmsub_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vnmsub_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, size_t vl) {
@@ -324,7 +324,7 @@ vint32m8_t test_vnmsub_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vnmsub_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_t vl) {
@@ -333,7 +333,7 @@ vint32m8_t test_vnmsub_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vnmsub_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, size_t vl) {
@@ -342,7 +342,7 @@ vint64m1_t test_vnmsub_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vnmsub_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_t vl) {
@@ -351,7 +351,7 @@ vint64m1_t test_vnmsub_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vnmsub_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, size_t vl) {
@@ -360,7 +360,7 @@ vint64m2_t test_vnmsub_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vnmsub_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_t vl) {
@@ -369,7 +369,7 @@ vint64m2_t test_vnmsub_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vnmsub_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, size_t vl) {
@@ -378,7 +378,7 @@ vint64m4_t test_vnmsub_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vnmsub_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_t vl) {
@@ -387,7 +387,7 @@ vint64m4_t test_vnmsub_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vnmsub_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, size_t vl) {
@@ -396,7 +396,7 @@ vint64m8_t test_vnmsub_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vnmsub_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_t vl) {
@@ -405,7 +405,7 @@ vint64m8_t test_vnmsub_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vnmsub_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -414,7 +414,7 @@ vuint8mf8_t test_vnmsub_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vnmsub_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -423,7 +423,7 @@ vuint8mf8_t test_vnmsub_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vnmsub_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -432,7 +432,7 @@ vuint8mf4_t test_vnmsub_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vnmsub_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -441,7 +441,7 @@ vuint8mf4_t test_vnmsub_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vnmsub_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -450,7 +450,7 @@ vuint8mf2_t test_vnmsub_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vnmsub_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -459,7 +459,7 @@ vuint8mf2_t test_vnmsub_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vnmsub_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -468,7 +468,7 @@ vuint8m1_t test_vnmsub_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vnmsub_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -477,7 +477,7 @@ vuint8m1_t test_vnmsub_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vnmsub_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -486,7 +486,7 @@ vuint8m2_t test_vnmsub_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vnmsub_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -495,7 +495,7 @@ vuint8m2_t test_vnmsub_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vnmsub_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -504,7 +504,7 @@ vuint8m4_t test_vnmsub_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vnmsub_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -513,7 +513,7 @@ vuint8m4_t test_vnmsub_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vnmsub_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
@@ -522,7 +522,7 @@ vuint8m8_t test_vnmsub_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vnmsub_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t vl) {
@@ -531,7 +531,7 @@ vuint8m8_t test_vnmsub_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vnmsub_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -540,7 +540,7 @@ vuint16mf4_t test_vnmsub_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vnmsub_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -549,7 +549,7 @@ vuint16mf4_t test_vnmsub_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vnmsub_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -558,7 +558,7 @@ vuint16mf2_t test_vnmsub_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vnmsub_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -567,7 +567,7 @@ vuint16mf2_t test_vnmsub_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vnmsub_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -576,7 +576,7 @@ vuint16m1_t test_vnmsub_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vnmsub_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -585,7 +585,7 @@ vuint16m1_t test_vnmsub_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vnmsub_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -594,7 +594,7 @@ vuint16m2_t test_vnmsub_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vnmsub_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -603,7 +603,7 @@ vuint16m2_t test_vnmsub_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vnmsub_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -612,7 +612,7 @@ vuint16m4_t test_vnmsub_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vnmsub_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -621,7 +621,7 @@ vuint16m4_t test_vnmsub_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vnmsub_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
@@ -630,7 +630,7 @@ vuint16m8_t test_vnmsub_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vnmsub_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, size_t vl) {
@@ -639,7 +639,7 @@ vuint16m8_t test_vnmsub_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vnmsub_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -648,7 +648,7 @@ vuint32mf2_t test_vnmsub_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vnmsub_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -657,7 +657,7 @@ vuint32mf2_t test_vnmsub_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vnmsub_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -666,7 +666,7 @@ vuint32m1_t test_vnmsub_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vnmsub_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -675,7 +675,7 @@ vuint32m1_t test_vnmsub_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vnmsub_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -684,7 +684,7 @@ vuint32m2_t test_vnmsub_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vnmsub_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -693,7 +693,7 @@ vuint32m2_t test_vnmsub_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vnmsub_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -702,7 +702,7 @@ vuint32m4_t test_vnmsub_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vnmsub_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, size_t vl) {
@@ -711,7 +711,7 @@ vuint32m4_t test_vnmsub_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vnmsub_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2, size_t vl) {
@@ -720,7 +720,7 @@ vuint32m8_t test_vnmsub_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vnmsub_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, size_t vl) {
@@ -729,7 +729,7 @@ vuint32m8_t test_vnmsub_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vnmsub_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2, size_t vl) {
@@ -738,7 +738,7 @@ vuint64m1_t test_vnmsub_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vnmsub_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, size_t vl) {
@@ -747,7 +747,7 @@ vuint64m1_t test_vnmsub_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vnmsub_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2, size_t vl) {
@@ -756,7 +756,7 @@ vuint64m2_t test_vnmsub_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vnmsub_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, size_t vl) {
@@ -765,7 +765,7 @@ vuint64m2_t test_vnmsub_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vnmsub_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2, size_t vl) {
@@ -774,7 +774,7 @@ vuint64m4_t test_vnmsub_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vnmsub_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, size_t vl) {
@@ -783,7 +783,7 @@ vuint64m4_t test_vnmsub_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vnmsub_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2, size_t vl) {
@@ -792,7 +792,7 @@ vuint64m8_t test_vnmsub_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vnmsub_vx_u64m8(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmacc.c
index 6fcec72fdb769..741d11a1cdc55 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmacc.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmacc.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vwmacc_vv_i16mf4(vint16mf4_t vd, vint8mf8_t vs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint16mf4_t test_vwmacc_vv_i16mf4(vint16mf4_t vd, vint8mf8_t vs1, vint8mf8_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmacc.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmacc.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vwmacc_vx_i16mf4(vint16mf4_t vd, int8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint16mf4_t test_vwmacc_vx_i16mf4(vint16mf4_t vd, int8_t rs1, vint8mf8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmacc.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmacc.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vwmacc_vv_i16mf2(vint16mf2_t vd, vint8mf4_t vs1, vint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint16mf2_t test_vwmacc_vv_i16mf2(vint16mf2_t vd, vint8mf4_t vs1, vint8mf4_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmacc.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmacc.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vwmacc_vx_i16mf2(vint16mf2_t vd, int8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint16mf2_t test_vwmacc_vx_i16mf2(vint16mf2_t vd, int8_t rs1, vint8mf4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmacc.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmacc.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vwmacc_vv_i16m1(vint16m1_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint16m1_t test_vwmacc_vv_i16m1(vint16m1_t vd, vint8mf2_t vs1, vint8mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmacc.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmacc.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vwmacc_vx_i16m1(vint16m1_t vd, int8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint16m1_t test_vwmacc_vx_i16m1(vint16m1_t vd, int8_t rs1, vint8mf2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmacc.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmacc.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vwmacc_vv_i16m2(vint16m2_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint16m2_t test_vwmacc_vv_i16m2(vint16m2_t vd, vint8m1_t vs1, vint8m1_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmacc.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmacc.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vwmacc_vx_i16m2(vint16m2_t vd, int8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint16m2_t test_vwmacc_vx_i16m2(vint16m2_t vd, int8_t rs1, vint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmacc.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmacc.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vwmacc_vv_i16m4(vint16m4_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint16m4_t test_vwmacc_vv_i16m4(vint16m4_t vd, vint8m2_t vs1, vint8m2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmacc.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmacc.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vwmacc_vx_i16m4(vint16m4_t vd, int8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint16m4_t test_vwmacc_vx_i16m4(vint16m4_t vd, int8_t rs1, vint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmacc.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmacc.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vwmacc_vv_i16m8(vint16m8_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint16m8_t test_vwmacc_vv_i16m8(vint16m8_t vd, vint8m4_t vs1, vint8m4_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmacc.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmacc.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vwmacc_vx_i16m8(vint16m8_t vd, int8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint16m8_t test_vwmacc_vx_i16m8(vint16m8_t vd, int8_t rs1, vint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmacc.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmacc.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vwmacc_vv_i32mf2(vint32mf2_t vd, vint16mf4_t vs1, vint16mf4_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint32mf2_t test_vwmacc_vv_i32mf2(vint32mf2_t vd, vint16mf4_t vs1, vint16mf4_t v
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmacc.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmacc.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vwmacc_vx_i32mf2(vint32mf2_t vd, int16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint32mf2_t test_vwmacc_vx_i32mf2(vint32mf2_t vd, int16_t rs1, vint16mf4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmacc.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmacc.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vwmacc_vv_i32m1(vint32m1_t vd, vint16mf2_t vs1, vint16mf2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint32m1_t test_vwmacc_vv_i32m1(vint32m1_t vd, vint16mf2_t vs1, vint16mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmacc.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmacc.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vwmacc_vx_i32m1(vint32m1_t vd, int16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint32m1_t test_vwmacc_vx_i32m1(vint32m1_t vd, int16_t rs1, vint16mf2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vwmacc_vv_i32m2(vint32m2_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint32m2_t test_vwmacc_vv_i32m2(vint32m2_t vd, vint16m1_t vs1, vint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vwmacc_vx_i32m2(vint32m2_t vd, int16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint32m2_t test_vwmacc_vx_i32m2(vint32m2_t vd, int16_t rs1, vint16m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmacc.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmacc.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vwmacc_vv_i32m4(vint32m4_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint32m4_t test_vwmacc_vv_i32m4(vint32m4_t vd, vint16m2_t vs1, vint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmacc.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmacc.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vwmacc_vx_i32m4(vint32m4_t vd, int16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint32m4_t test_vwmacc_vx_i32m4(vint32m4_t vd, int16_t rs1, vint16m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmacc.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmacc.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vwmacc_vv_i32m8(vint32m8_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint32m8_t test_vwmacc_vv_i32m8(vint32m8_t vd, vint16m4_t vs1, vint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmacc.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmacc.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vwmacc_vx_i32m8(vint32m8_t vd, int16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint32m8_t test_vwmacc_vx_i32m8(vint32m8_t vd, int16_t rs1, vint16m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmacc.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmacc.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vwmacc_vv_i64m1(vint64m1_t vd, vint32mf2_t vs1, vint32mf2_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint64m1_t test_vwmacc_vv_i64m1(vint64m1_t vd, vint32mf2_t vs1, vint32mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmacc.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmacc.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vwmacc_vx_i64m1(vint64m1_t vd, int32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint64m1_t test_vwmacc_vx_i64m1(vint64m1_t vd, int32_t rs1, vint32mf2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmacc.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmacc.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vwmacc_vv_i64m2(vint64m2_t vd, vint32m1_t vs1, vint32m1_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint64m2_t test_vwmacc_vv_i64m2(vint64m2_t vd, vint32m1_t vs1, vint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmacc.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmacc.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vwmacc_vx_i64m2(vint64m2_t vd, int32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint64m2_t test_vwmacc_vx_i64m2(vint64m2_t vd, int32_t rs1, vint32m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vwmacc_vv_i64m4(vint64m4_t vd, vint32m2_t vs1, vint32m2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint64m4_t test_vwmacc_vv_i64m4(vint64m4_t vd, vint32m2_t vs1, vint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vwmacc_vx_i64m4(vint64m4_t vd, int32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint64m4_t test_vwmacc_vx_i64m4(vint64m4_t vd, int32_t rs1, vint32m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmacc.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmacc.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vwmacc_vv_i64m8(vint64m8_t vd, vint32m4_t vs1, vint32m4_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint64m8_t test_vwmacc_vv_i64m8(vint64m8_t vd, vint32m4_t vs1, vint32m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmacc.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmacc.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vwmacc_vx_i64m8(vint64m8_t vd, int32_t rs1, vint32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccsu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccsu.c
index 1f6533bd685e4..4edcb9209ad5c 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccsu.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccsu.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vwmaccsu_vv_i16mf4(vint16mf4_t vd, vint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint16mf4_t test_vwmaccsu_vv_i16mf4(vint16mf4_t vd, vint8mf8_t vs1, vuint8mf8_t
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccsu.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccsu.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vwmaccsu_vx_i16mf4(vint16mf4_t vd, int8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint16mf4_t test_vwmaccsu_vx_i16mf4(vint16mf4_t vd, int8_t rs1, vuint8mf8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccsu.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccsu.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vwmaccsu_vv_i16mf2(vint16mf2_t vd, vint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint16mf2_t test_vwmaccsu_vv_i16mf2(vint16mf2_t vd, vint8mf4_t vs1, vuint8mf4_t
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccsu.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccsu.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vwmaccsu_vx_i16mf2(vint16mf2_t vd, int8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint16mf2_t test_vwmaccsu_vx_i16mf2(vint16mf2_t vd, int8_t rs1, vuint8mf4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccsu.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccsu.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vwmaccsu_vv_i16m1(vint16m1_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint16m1_t test_vwmaccsu_vv_i16m1(vint16m1_t vd, vint8mf2_t vs1, vuint8mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccsu.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccsu.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vwmaccsu_vx_i16m1(vint16m1_t vd, int8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint16m1_t test_vwmaccsu_vx_i16m1(vint16m1_t vd, int8_t rs1, vuint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccsu.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccsu.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vwmaccsu_vv_i16m2(vint16m2_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint16m2_t test_vwmaccsu_vv_i16m2(vint16m2_t vd, vint8m1_t vs1, vuint8m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccsu.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccsu.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vwmaccsu_vx_i16m2(vint16m2_t vd, int8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint16m2_t test_vwmaccsu_vx_i16m2(vint16m2_t vd, int8_t rs1, vuint8m1_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccsu.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccsu.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vwmaccsu_vv_i16m4(vint16m4_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint16m4_t test_vwmaccsu_vv_i16m4(vint16m4_t vd, vint8m2_t vs1, vuint8m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccsu.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccsu.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vwmaccsu_vx_i16m4(vint16m4_t vd, int8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint16m4_t test_vwmaccsu_vx_i16m4(vint16m4_t vd, int8_t rs1, vuint8m2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccsu.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccsu.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vwmaccsu_vv_i16m8(vint16m8_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint16m8_t test_vwmaccsu_vv_i16m8(vint16m8_t vd, vint8m4_t vs1, vuint8m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccsu.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccsu.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vwmaccsu_vx_i16m8(vint16m8_t vd, int8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint16m8_t test_vwmaccsu_vx_i16m8(vint16m8_t vd, int8_t rs1, vuint8m4_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccsu.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccsu.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vwmaccsu_vv_i32mf2(vint32mf2_t vd, vint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint32mf2_t test_vwmaccsu_vv_i32mf2(vint32mf2_t vd, vint16mf4_t vs1, vuint16mf4_
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccsu.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccsu.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vwmaccsu_vx_i32mf2(vint32mf2_t vd, int16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint32mf2_t test_vwmaccsu_vx_i32mf2(vint32mf2_t vd, int16_t rs1, vuint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccsu.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccsu.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vwmaccsu_vv_i32m1(vint32m1_t vd, vint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint32m1_t test_vwmaccsu_vv_i32m1(vint32m1_t vd, vint16mf2_t vs1, vuint16mf2_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccsu.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccsu.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vwmaccsu_vx_i32m1(vint32m1_t vd, int16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint32m1_t test_vwmaccsu_vx_i32m1(vint32m1_t vd, int16_t rs1, vuint16mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vwmaccsu_vv_i32m2(vint32m2_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint32m2_t test_vwmaccsu_vv_i32m2(vint32m2_t vd, vint16m1_t vs1, vuint16m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vwmaccsu_vx_i32m2(vint32m2_t vd, int16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint32m2_t test_vwmaccsu_vx_i32m2(vint32m2_t vd, int16_t rs1, vuint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccsu.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccsu.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vwmaccsu_vv_i32m4(vint32m4_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint32m4_t test_vwmaccsu_vv_i32m4(vint32m4_t vd, vint16m2_t vs1, vuint16m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccsu.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccsu.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vwmaccsu_vx_i32m4(vint32m4_t vd, int16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint32m4_t test_vwmaccsu_vx_i32m4(vint32m4_t vd, int16_t rs1, vuint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccsu.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccsu.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vwmaccsu_vv_i32m8(vint32m8_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint32m8_t test_vwmaccsu_vv_i32m8(vint32m8_t vd, vint16m4_t vs1, vuint16m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccsu.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccsu.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vwmaccsu_vx_i32m8(vint32m8_t vd, int16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint32m8_t test_vwmaccsu_vx_i32m8(vint32m8_t vd, int16_t rs1, vuint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccsu.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccsu.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vwmaccsu_vv_i64m1(vint64m1_t vd, vint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint64m1_t test_vwmaccsu_vv_i64m1(vint64m1_t vd, vint32mf2_t vs1, vuint32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccsu.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccsu.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vwmaccsu_vx_i64m1(vint64m1_t vd, int32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint64m1_t test_vwmaccsu_vx_i64m1(vint64m1_t vd, int32_t rs1, vuint32mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccsu.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccsu.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vwmaccsu_vv_i64m2(vint64m2_t vd, vint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint64m2_t test_vwmaccsu_vv_i64m2(vint64m2_t vd, vint32m1_t vs1, vuint32m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccsu.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccsu.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vwmaccsu_vx_i64m2(vint64m2_t vd, int32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint64m2_t test_vwmaccsu_vx_i64m2(vint64m2_t vd, int32_t rs1, vuint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccsu.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccsu.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vwmaccsu_vv_i64m4(vint64m4_t vd, vint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint64m4_t test_vwmaccsu_vv_i64m4(vint64m4_t vd, vint32m2_t vs1, vuint32m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccsu.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccsu.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vwmaccsu_vx_i64m4(vint64m4_t vd, int32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint64m4_t test_vwmaccsu_vx_i64m4(vint64m4_t vd, int32_t rs1, vuint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccsu.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccsu.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vwmaccsu_vv_i64m8(vint64m8_t vd, vint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint64m8_t test_vwmaccsu_vv_i64m8(vint64m8_t vd, vint32m4_t vs1, vuint32m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccsu.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccsu.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vwmaccsu_vx_i64m8(vint64m8_t vd, int32_t rs1, vuint32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccu.c
index c28439413a708..8ac0f646c7eab 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccu.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccu.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccu.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccu.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vwmaccu_vv_u16mf4(vuint16mf4_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vuint16mf4_t test_vwmaccu_vv_u16mf4(vuint16mf4_t vd, vuint8mf8_t vs1, vuint8mf8_
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccu.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccu.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vwmaccu_vx_u16mf4(vuint16mf4_t vd, uint8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vuint16mf4_t test_vwmaccu_vx_u16mf4(vuint16mf4_t vd, uint8_t rs1, vuint8mf8_t vs
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccu.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccu.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vwmaccu_vv_u16mf2(vuint16mf2_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vuint16mf2_t test_vwmaccu_vv_u16mf2(vuint16mf2_t vd, vuint8mf4_t vs1, vuint8mf4_
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccu.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccu.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vwmaccu_vx_u16mf2(vuint16mf2_t vd, uint8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vuint16mf2_t test_vwmaccu_vx_u16mf2(vuint16mf2_t vd, uint8_t rs1, vuint8mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccu.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccu.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vwmaccu_vv_u16m1(vuint16m1_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vuint16m1_t test_vwmaccu_vv_u16m1(vuint16m1_t vd, vuint8mf2_t vs1, vuint8mf2_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccu.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccu.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vwmaccu_vx_u16m1(vuint16m1_t vd, uint8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vuint16m1_t test_vwmaccu_vx_u16m1(vuint16m1_t vd, uint8_t rs1, vuint8mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccu.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccu.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vwmaccu_vv_u16m2(vuint16m2_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vuint16m2_t test_vwmaccu_vv_u16m2(vuint16m2_t vd, vuint8m1_t vs1, vuint8m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccu.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccu.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vwmaccu_vx_u16m2(vuint16m2_t vd, uint8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vuint16m2_t test_vwmaccu_vx_u16m2(vuint16m2_t vd, uint8_t rs1, vuint8m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccu.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccu.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vwmaccu_vv_u16m4(vuint16m4_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vuint16m4_t test_vwmaccu_vv_u16m4(vuint16m4_t vd, vuint8m2_t vs1, vuint8m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccu.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccu.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vwmaccu_vx_u16m4(vuint16m4_t vd, uint8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vuint16m4_t test_vwmaccu_vx_u16m4(vuint16m4_t vd, uint8_t rs1, vuint8m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccu.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccu.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vwmaccu_vv_u16m8(vuint16m8_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vuint16m8_t test_vwmaccu_vv_u16m8(vuint16m8_t vd, vuint8m4_t vs1, vuint8m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccu.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccu.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vwmaccu_vx_u16m8(vuint16m8_t vd, uint8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vuint16m8_t test_vwmaccu_vx_u16m8(vuint16m8_t vd, uint8_t rs1, vuint8m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccu.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccu.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vwmaccu_vv_u32mf2(vuint32mf2_t vd, vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vuint32mf2_t test_vwmaccu_vv_u32mf2(vuint32mf2_t vd, vuint16mf4_t vs1, vuint16mf
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccu.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccu.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vwmaccu_vx_u32mf2(vuint32mf2_t vd, uint16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vuint32mf2_t test_vwmaccu_vx_u32mf2(vuint32mf2_t vd, uint16_t rs1, vuint16mf4_t
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccu.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccu.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vwmaccu_vv_u32m1(vuint32m1_t vd, vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vuint32m1_t test_vwmaccu_vv_u32m1(vuint32m1_t vd, vuint16mf2_t vs1, vuint16mf2_t
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccu.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccu.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vwmaccu_vx_u32m1(vuint32m1_t vd, uint16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vuint32m1_t test_vwmaccu_vx_u32m1(vuint32m1_t vd, uint16_t rs1, vuint16mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vwmaccu_vv_u32m2(vuint32m2_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vuint32m2_t test_vwmaccu_vv_u32m2(vuint32m2_t vd, vuint16m1_t vs1, vuint16m1_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vwmaccu_vx_u32m2(vuint32m2_t vd, uint16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vuint32m2_t test_vwmaccu_vx_u32m2(vuint32m2_t vd, uint16_t rs1, vuint16m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccu.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccu.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vwmaccu_vv_u32m4(vuint32m4_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vuint32m4_t test_vwmaccu_vv_u32m4(vuint32m4_t vd, vuint16m2_t vs1, vuint16m2_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccu.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccu.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vwmaccu_vx_u32m4(vuint32m4_t vd, uint16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vuint32m4_t test_vwmaccu_vx_u32m4(vuint32m4_t vd, uint16_t rs1, vuint16m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccu.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccu.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vwmaccu_vv_u32m8(vuint32m8_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vuint32m8_t test_vwmaccu_vv_u32m8(vuint32m8_t vd, vuint16m4_t vs1, vuint16m4_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccu.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccu.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vwmaccu_vx_u32m8(vuint32m8_t vd, uint16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vuint32m8_t test_vwmaccu_vx_u32m8(vuint32m8_t vd, uint16_t rs1, vuint16m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccu.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccu.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vwmaccu_vv_u64m1(vuint64m1_t vd, vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vuint64m1_t test_vwmaccu_vv_u64m1(vuint64m1_t vd, vuint32mf2_t vs1, vuint32mf2_t
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccu.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccu.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vwmaccu_vx_u64m1(vuint64m1_t vd, uint32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vuint64m1_t test_vwmaccu_vx_u64m1(vuint64m1_t vd, uint32_t rs1, vuint32mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccu.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccu.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vwmaccu_vv_u64m2(vuint64m2_t vd, vuint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vuint64m2_t test_vwmaccu_vv_u64m2(vuint64m2_t vd, vuint32m1_t vs1, vuint32m1_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccu.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccu.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vwmaccu_vx_u64m2(vuint64m2_t vd, uint32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vuint64m2_t test_vwmaccu_vx_u64m2(vuint64m2_t vd, uint32_t rs1, vuint32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vwmaccu_vv_u64m4(vuint64m4_t vd, vuint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vuint64m4_t test_vwmaccu_vv_u64m4(vuint64m4_t vd, vuint32m2_t vs1, vuint32m2_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vwmaccu_vx_u64m4(vuint64m4_t vd, uint32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vuint64m4_t test_vwmaccu_vx_u64m4(vuint64m4_t vd, uint32_t rs1, vuint32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccu.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccu.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vwmaccu_vv_u64m8(vuint64m8_t vd, vuint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vuint64m8_t test_vwmaccu_vv_u64m8(vuint64m8_t vd, vuint32m4_t vs1, vuint32m4_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccu.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccu.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vwmaccu_vx_u64m8(vuint64m8_t vd, uint32_t rs1, vuint32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccus.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccus.c
index 28cc80ba07b55..ce48153787b1d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccus.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vwmaccus.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccus.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccus.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vwmaccus_vx_i16mf4(vint16mf4_t vd, uint8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint16mf4_t test_vwmaccus_vx_i16mf4(vint16mf4_t vd, uint8_t rs1, vint8mf8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccus.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccus.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vwmaccus_vx_i16mf2(vint16mf2_t vd, uint8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint16mf2_t test_vwmaccus_vx_i16mf2(vint16mf2_t vd, uint8_t rs1, vint8mf4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccus.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccus.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vwmaccus_vx_i16m1(vint16m1_t vd, uint8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint16m1_t test_vwmaccus_vx_i16m1(vint16m1_t vd, uint8_t rs1, vint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccus.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccus.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vwmaccus_vx_i16m2(vint16m2_t vd, uint8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint16m2_t test_vwmaccus_vx_i16m2(vint16m2_t vd, uint8_t rs1, vint8m1_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccus.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccus.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vwmaccus_vx_i16m4(vint16m4_t vd, uint8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint16m4_t test_vwmaccus_vx_i16m4(vint16m4_t vd, uint8_t rs1, vint8m2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccus.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccus.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vwmaccus_vx_i16m8(vint16m8_t vd, uint8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint16m8_t test_vwmaccus_vx_i16m8(vint16m8_t vd, uint8_t rs1, vint8m4_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccus.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccus.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vwmaccus_vx_i32mf2(vint32mf2_t vd, uint16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint32mf2_t test_vwmaccus_vx_i32mf2(vint32mf2_t vd, uint16_t rs1, vint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccus.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccus.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vwmaccus_vx_i32m1(vint32m1_t vd, uint16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint32m1_t test_vwmaccus_vx_i32m1(vint32m1_t vd, uint16_t rs1, vint16mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccus.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccus.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vwmaccus_vx_i32m2(vint32m2_t vd, uint16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint32m2_t test_vwmaccus_vx_i32m2(vint32m2_t vd, uint16_t rs1, vint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccus.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccus.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vwmaccus_vx_i32m4(vint32m4_t vd, uint16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint32m4_t test_vwmaccus_vx_i32m4(vint32m4_t vd, uint16_t rs1, vint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccus.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccus.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vwmaccus_vx_i32m8(vint32m8_t vd, uint16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint32m8_t test_vwmaccus_vx_i32m8(vint32m8_t vd, uint16_t rs1, vint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccus.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccus.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vwmaccus_vx_i64m1(vint64m1_t vd, uint32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint64m1_t test_vwmaccus_vx_i64m1(vint64m1_t vd, uint32_t rs1, vint32mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccus.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccus.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vwmaccus_vx_i64m2(vint64m2_t vd, uint32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint64m2_t test_vwmaccus_vx_i64m2(vint64m2_t vd, uint32_t rs1, vint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccus.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccus.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vwmaccus_vx_i64m4(vint64m4_t vd, uint32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint64m4_t test_vwmaccus_vx_i64m4(vint64m4_t vd, uint32_t rs1, vint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccus.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccus.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vwmaccus_vx_i64m8(vint64m8_t vd, uint32_t rs1, vint32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmacc.c
index 7f7bc8f752380..b29f446c5da78 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmacc_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfmacc_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmacc_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfmacc_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmacc_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfmacc_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmacc_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfmacc_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmacc_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfmacc_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmacc_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfmacc_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmacc_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfmacc_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmacc_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfmacc_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmacc_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfmacc_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmacc_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfmacc_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmacc.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmacc.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmacc_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfmacc_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmacc.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmacc.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmacc_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfmacc_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat3
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmacc_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfmacc_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmacc_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfmacc_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmacc_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfmacc_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmacc_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfmacc_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmacc_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfmacc_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmacc_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfmacc_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmacc_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfmacc_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmacc_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfmacc_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmacc_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfmacc_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmacc_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfmacc_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmacc_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfmacc_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmacc_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfmacc_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmacc_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfmacc_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmacc_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfmacc_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmacc_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfmacc_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmacc_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmacc.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmacc.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmacc_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfmacc_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_
 
 // CHECK-RV64-LABEL: @test_vfmacc_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmacc.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmacc.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmacc_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmadd.c
index 9e929a2669436..808a84028f6fb 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmadd.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmadd_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfmadd_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmadd_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfmadd_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmadd_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfmadd_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmadd_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfmadd_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmadd_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfmadd_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmadd_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfmadd_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmadd_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfmadd_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmadd_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfmadd_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmadd_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfmadd_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmadd_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfmadd_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmadd.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmadd.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmadd_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfmadd_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmadd.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmadd.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmadd_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfmadd_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmadd_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfmadd_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat3
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmadd_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfmadd_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmadd_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfmadd_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmadd_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfmadd_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmadd_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfmadd_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmadd_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfmadd_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmadd_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfmadd_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmadd_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfmadd_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmadd.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmadd.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmadd_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfmadd_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmadd.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmadd.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmadd_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfmadd_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmadd_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfmadd_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmadd_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfmadd_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmadd_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfmadd_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmadd_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfmadd_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmadd_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfmadd_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmadd_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfmadd_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmadd_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmadd.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmadd.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmadd_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfmadd_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_
 
 // CHECK-RV64-LABEL: @test_vfmadd_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmadd.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmadd.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmadd_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmsac.c
index 1bb69cdd03df6..0b854c0be14d8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmsac.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmsac_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfmsac_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmsac_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfmsac_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmsac_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfmsac_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmsac_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfmsac_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmsac_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfmsac_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmsac_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfmsac_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmsac_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfmsac_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmsac_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfmsac_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmsac_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfmsac_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmsac_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfmsac_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsac.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsac.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmsac_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfmsac_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsac.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsac.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmsac_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfmsac_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat3
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmsac_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfmsac_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmsac_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfmsac_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmsac_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfmsac_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmsac_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfmsac_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmsac_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfmsac_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmsac_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfmsac_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmsac_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfmsac_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsac.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsac.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmsac_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfmsac_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsac.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsac.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmsac_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfmsac_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmsac_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfmsac_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmsac_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfmsac_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmsac_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfmsac_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmsac_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfmsac_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmsac_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfmsac_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmsac_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfmsac_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsac_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsac.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsac.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmsac_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfmsac_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_
 
 // CHECK-RV64-LABEL: @test_vfmsac_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsac.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsac.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmsac_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmsub.c
index 7ff5e6128c824..1757bee4e41d6 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfmsub.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmsub_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfmsub_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfmsub_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfmsub_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmsub_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfmsub_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfmsub_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfmsub_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmsub_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfmsub_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfmsub_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfmsub_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmsub_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfmsub_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfmsub_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfmsub_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmsub_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfmsub_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfmsub_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfmsub_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsub.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsub.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmsub_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfmsub_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsub.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmsub.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfmsub_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfmsub_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmsub_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfmsub_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat3
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfmsub_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfmsub_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmsub_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfmsub_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfmsub_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfmsub_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmsub_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfmsub_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfmsub_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfmsub_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmsub_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfmsub_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfmsub_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfmsub_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsub.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsub.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmsub_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfmsub_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsub.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmsub.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfmsub_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfmsub_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmsub_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfmsub_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfmsub_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfmsub_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmsub_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfmsub_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfmsub_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfmsub_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmsub_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfmsub_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfmsub_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfmsub_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfmsub_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsub.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsub.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmsub_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfmsub_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_
 
 // CHECK-RV64-LABEL: @test_vfmsub_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsub.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmsub.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfmsub_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmacc.c
index dbe79169dae09..36506706ef6a4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmacc_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfnmacc_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmacc_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfnmacc_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmacc_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfnmacc_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmacc_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfnmacc_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmacc_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfnmacc_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmacc_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfnmacc_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmacc_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfnmacc_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmacc_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfnmacc_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmacc_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfnmacc_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmacc_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfnmacc_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmacc.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmacc.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmacc_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfnmacc_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmacc.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmacc.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmacc_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfnmacc_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t v
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfnmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmacc_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfnmacc_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmacc_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfnmacc_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmacc_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfnmacc_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmacc_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfnmacc_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmacc_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfnmacc_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmacc_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfnmacc_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmacc_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfnmacc_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmacc.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmacc.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmacc_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfnmacc_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmacc.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmacc.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmacc_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfnmacc_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmacc_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfnmacc_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmacc_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfnmacc_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmacc_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfnmacc_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmacc_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfnmacc_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmacc_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfnmacc_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmacc_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfnmacc_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmacc.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmacc.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmacc_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfnmacc_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8
 
 // CHECK-RV64-LABEL: @test_vfnmacc_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmacc.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmacc.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmacc_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmadd.c
index 727f5690b987d..6239eafbfeb68 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmadd.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmadd_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfnmadd_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmadd_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfnmadd_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmadd_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfnmadd_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmadd_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfnmadd_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmadd_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfnmadd_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmadd_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfnmadd_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmadd_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfnmadd_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmadd_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfnmadd_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmadd_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfnmadd_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmadd_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfnmadd_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmadd.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmadd.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmadd_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfnmadd_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmadd.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmadd.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmadd_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfnmadd_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t v
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmadd_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfnmadd_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmadd_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfnmadd_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmadd_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfnmadd_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmadd_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfnmadd_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmadd_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfnmadd_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmadd_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfnmadd_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmadd_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfnmadd_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmadd_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfnmadd_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmadd.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmadd.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmadd_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfnmadd_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmadd.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmadd.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmadd_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfnmadd_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmadd_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfnmadd_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmadd_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfnmadd_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmadd_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfnmadd_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmadd_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfnmadd_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmadd_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfnmadd_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmadd_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfnmadd_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmadd.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmadd.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmadd_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfnmadd_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8
 
 // CHECK-RV64-LABEL: @test_vfnmadd_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmadd.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmadd.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmadd_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmsac.c
index ce3b06043a194..3047c8bfae856 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmsac.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmsac_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfnmsac_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmsac_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfnmsac_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmsac_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfnmsac_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmsac_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfnmsac_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmsac_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfnmsac_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmsac_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfnmsac_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmsac_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfnmsac_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmsac_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfnmsac_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmsac_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfnmsac_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmsac_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfnmsac_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsac.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsac.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmsac_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfnmsac_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsac.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsac.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmsac_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfnmsac_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfnmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmsac_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfnmsac_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmsac_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfnmsac_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmsac_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfnmsac_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmsac_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfnmsac_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmsac_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfnmsac_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmsac_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfnmsac_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmsac_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfnmsac_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsac.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsac.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmsac_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfnmsac_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsac.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsac.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmsac_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfnmsac_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmsac_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfnmsac_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmsac_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfnmsac_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmsac_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfnmsac_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmsac_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfnmsac_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmsac_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfnmsac_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmsac_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfnmsac_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsac.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsac.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmsac_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfnmsac_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8
 
 // CHECK-RV64-LABEL: @test_vfnmsac_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsac.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsac.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmsac_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmsub.c
index da68bd0250562..9975042da7cd8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfnmsub.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmsub_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat16mf4_t test_vfnmsub_vv_f16mf4(vfloat16mf4_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16.i64(<vscale x 1 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vfnmsub_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat16mf4_t test_vfnmsub_vf_f16mf4(vfloat16mf4_t vd, _Float16 rs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmsub_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat16mf2_t test_vfnmsub_vv_f16mf2(vfloat16mf2_t vd, vfloat16mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16.i64(<vscale x 2 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vfnmsub_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat16mf2_t test_vfnmsub_vf_f16mf2(vfloat16mf2_t vd, _Float16 rs1, vfloat16mf2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmsub_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat16m1_t test_vfnmsub_vv_f16m1(vfloat16m1_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16.i64(<vscale x 4 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vfnmsub_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat16m1_t test_vfnmsub_vf_f16m1(vfloat16m1_t vd, _Float16 rs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmsub_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat16m2_t test_vfnmsub_vv_f16m2(vfloat16m2_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16.i64(<vscale x 8 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vfnmsub_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat16m2_t test_vfnmsub_vf_f16m2(vfloat16m2_t vd, _Float16 rs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmsub_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat16m4_t test_vfnmsub_vv_f16m4(vfloat16m4_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16.i64(<vscale x 16 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vfnmsub_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat16m4_t test_vfnmsub_vf_f16m4(vfloat16m4_t vd, _Float16 rs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsub.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsub.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[VD:%.*]], <vscale x 32 x half> [[VS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmsub_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat16m8_t test_vfnmsub_vv_f16m8(vfloat16m8_t vd, vfloat16m8_t vs1, vfloat16m8
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsub.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfnmsub.nxv32f16.f16.i64(<vscale x 32 x half> [[VD:%.*]], half [[RS1:%.*]], <vscale x 32 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vfnmsub_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat16m8_t test_vfnmsub_vf_f16m8(vfloat16m8_t vd, _Float16 rs1, vfloat16m8_t v
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmsub_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat32mf2_t test_vfnmsub_vv_f32mf2(vfloat32mf2_t vd, vfloat32mf2_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32.i64(<vscale x 1 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfnmsub_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat32mf2_t test_vfnmsub_vf_f32mf2(vfloat32mf2_t vd, float rs1, vfloat32mf2_t
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmsub_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat32m1_t test_vfnmsub_vv_f32m1(vfloat32m1_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32.i64(<vscale x 2 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfnmsub_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat32m1_t test_vfnmsub_vf_f32m1(vfloat32m1_t vd, float rs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmsub_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat32m2_t test_vfnmsub_vv_f32m2(vfloat32m2_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32.i64(<vscale x 4 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfnmsub_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vfloat32m2_t test_vfnmsub_vf_f32m2(vfloat32m2_t vd, float rs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmsub_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vfloat32m4_t test_vfnmsub_vv_f32m4(vfloat32m4_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32.i64(<vscale x 8 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfnmsub_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vfloat32m4_t test_vfnmsub_vf_f32m4(vfloat32m4_t vd, float rs1, vfloat32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsub.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsub.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x float> [[VS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmsub_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vfloat32m8_t test_vfnmsub_vv_f32m8(vfloat32m8_t vd, vfloat32m8_t vs1, vfloat32m8
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsub.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfnmsub.nxv16f32.f32.i64(<vscale x 16 x float> [[VD:%.*]], float [[RS1:%.*]], <vscale x 16 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfnmsub_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vfloat32m8_t test_vfnmsub_vf_f32m8(vfloat32m8_t vd, float rs1, vfloat32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x double> [[VS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmsub_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vfloat64m1_t test_vfnmsub_vv_f64m1(vfloat64m1_t vd, vfloat64m1_t vs1, vfloat64m1
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64.i64(<vscale x 1 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 1 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfnmsub_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vfloat64m1_t test_vfnmsub_vf_f64m1(vfloat64m1_t vd, double rs1, vfloat64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x double> [[VS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmsub_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vfloat64m2_t test_vfnmsub_vv_f64m2(vfloat64m2_t vd, vfloat64m2_t vs1, vfloat64m2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64.i64(<vscale x 2 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 2 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfnmsub_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vfloat64m2_t test_vfnmsub_vf_f64m2(vfloat64m2_t vd, double rs1, vfloat64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x double> [[VS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmsub_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vfloat64m4_t test_vfnmsub_vv_f64m4(vfloat64m4_t vd, vfloat64m4_t vs1, vfloat64m4
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64.i64(<vscale x 4 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 4 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfnmsub_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vfloat64m4_t test_vfnmsub_vf_f64m4(vfloat64m4_t vd, double rs1, vfloat64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsub.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsub.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x double> [[VS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmsub_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vfloat64m8_t test_vfnmsub_vv_f64m8(vfloat64m8_t vd, vfloat64m8_t vs1, vfloat64m8
 
 // CHECK-RV64-LABEL: @test_vfnmsub_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsub.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfnmsub.nxv8f64.f64.i64(<vscale x 8 x double> [[VD:%.*]], double [[RS1:%.*]], <vscale x 8 x double> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfnmsub_vf_f64m8(vfloat64m8_t vd, double rs1, vfloat64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwmacc.c
index 302c85909272b..9dbe56610e3e0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat32mf2_t test_vfwmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwmacc_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat32mf2_t test_vfwmacc_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwmacc_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat32m1_t test_vfwmacc_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwmacc_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat32m1_t test_vfwmacc_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwmacc_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat32m2_t test_vfwmacc_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwmacc_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat32m2_t test_vfwmacc_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwmacc_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat32m4_t test_vfwmacc_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwmacc_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat32m4_t test_vfwmacc_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwmacc_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat32m8_t test_vfwmacc_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwmacc_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat32m8_t test_vfwmacc_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwmacc_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat64m1_t test_vfwmacc_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwmacc_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat64m1_t test_vfwmacc_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwmacc_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat64m2_t test_vfwmacc_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwmacc_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat64m2_t test_vfwmacc_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwmacc_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat64m4_t test_vfwmacc_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwmacc_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat64m4_t test_vfwmacc_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwmacc_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat64m8_t test_vfwmacc_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfwmacc_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwmacc_vf_f64m8(vfloat64m8_t vd, float vs1, vfloat32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwmsac.c
index 26dd2c5bd0080..f38d5001abc11 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwmsac.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat32mf2_t test_vfwmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwmsac_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat32mf2_t test_vfwmsac_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwmsac_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat32m1_t test_vfwmsac_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwmsac_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat32m1_t test_vfwmsac_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwmsac_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat32m2_t test_vfwmsac_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwmsac_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat32m2_t test_vfwmsac_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t v
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwmsac_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat32m4_t test_vfwmsac_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwmsac_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat32m4_t test_vfwmsac_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t v
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwmsac_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat32m8_t test_vfwmsac_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwmsac_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat32m8_t test_vfwmsac_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t v
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwmsac_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat64m1_t test_vfwmsac_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwmsac_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat64m1_t test_vfwmsac_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwmsac_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat64m2_t test_vfwmsac_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwmsac_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat64m2_t test_vfwmsac_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwmsac_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat64m4_t test_vfwmsac_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwmsac_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat64m4_t test_vfwmsac_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwmsac_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat64m8_t test_vfwmsac_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4
 
 // CHECK-RV64-LABEL: @test_vfwmsac_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwmsac_vf_f64m8(vfloat64m8_t vd, float vs1, vfloat32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwnmacc.c
index 8cafc4a498460..cf15345eb611e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwnmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwnmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwnmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat32mf2_t test_vfwnmacc_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloa
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwnmacc_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat32mf2_t test_vfwnmacc_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwnmacc_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat32m1_t test_vfwnmacc_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwnmacc_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat32m1_t test_vfwnmacc_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwnmacc_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat32m2_t test_vfwnmacc_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwnmacc_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat32m2_t test_vfwnmacc_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwnmacc_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat32m4_t test_vfwnmacc_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwnmacc_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat32m4_t test_vfwnmacc_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwnmacc_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat32m8_t test_vfwnmacc_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwnmacc_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat32m8_t test_vfwnmacc_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwnmacc_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat64m1_t test_vfwnmacc_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwnmacc_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat64m1_t test_vfwnmacc_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwnmacc_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat64m2_t test_vfwnmacc_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwnmacc_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat64m2_t test_vfwnmacc_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwnmacc_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat64m4_t test_vfwnmacc_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwnmacc_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat64m4_t test_vfwnmacc_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwnmacc_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat64m8_t test_vfwnmacc_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmacc_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwnmacc_vf_f64m8(vfloat64m8_t vd, float vs1, vfloat32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwnmsac.c
index 4cfbe10335fcd..d312c6eeffab5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vfwnmsac.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], <vscale x 1 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwnmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vfloat32mf2_t test_vfwnmsac_vv_f32mf2(vfloat32mf2_t vd, vfloat16mf4_t vs1, vfloa
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16.nxv1f16.i64(<vscale x 1 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vfwnmsac_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vfloat32mf2_t test_vfwnmsac_vf_f32mf2(vfloat32mf2_t vd, _Float16 vs1, vfloat16mf
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], <vscale x 2 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwnmsac_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vfloat32m1_t test_vfwnmsac_vv_f32m1(vfloat32m1_t vd, vfloat16mf2_t vs1, vfloat16
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16.nxv2f16.i64(<vscale x 2 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vfwnmsac_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vfloat32m1_t test_vfwnmsac_vf_f32m1(vfloat32m1_t vd, _Float16 vs1, vfloat16mf2_t
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwnmsac_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vfloat32m2_t test_vfwnmsac_vv_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16.nxv4f16.i64(<vscale x 4 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vfwnmsac_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vfloat32m2_t test_vfwnmsac_vf_f32m2(vfloat32m2_t vd, _Float16 vs1, vfloat16m1_t
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], <vscale x 8 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwnmsac_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m2_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vfloat32m4_t test_vfwnmsac_vv_f32m4(vfloat32m4_t vd, vfloat16m2_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16.nxv8f16.i64(<vscale x 8 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vfwnmsac_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vfloat32m4_t test_vfwnmsac_vf_f32m4(vfloat32m4_t vd, _Float16 vs1, vfloat16m2_t
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], <vscale x 16 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwnmsac_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m4_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vfloat32m8_t test_vfwnmsac_vv_f32m8(vfloat32m8_t vd, vfloat16m4_t vs1, vfloat16m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16.nxv16f16.i64(<vscale x 16 x float> [[VD:%.*]], half [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vfwnmsac_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vfloat32m8_t test_vfwnmsac_vf_f32m8(vfloat32m8_t vd, _Float16 vs1, vfloat16m4_t
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], <vscale x 1 x float> [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwnmsac_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vfloat64m1_t test_vfwnmsac_vv_f64m1(vfloat64m1_t vd, vfloat32mf2_t vs1, vfloat32
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32.nxv1f32.i64(<vscale x 1 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 1 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vfwnmsac_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vfloat64m1_t test_vfwnmsac_vf_f64m1(vfloat64m1_t vd, float vs1, vfloat32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], <vscale x 2 x float> [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwnmsac_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m1_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vfloat64m2_t test_vfwnmsac_vv_f64m2(vfloat64m2_t vd, vfloat32m1_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32.nxv2f32.i64(<vscale x 2 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 2 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vfwnmsac_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vfloat64m2_t test_vfwnmsac_vf_f64m2(vfloat64m2_t vd, float vs1, vfloat32m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], <vscale x 4 x float> [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwnmsac_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vfloat64m4_t test_vfwnmsac_vv_f64m4(vfloat64m4_t vd, vfloat32m2_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32.nxv4f32.i64(<vscale x 4 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 4 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vfwnmsac_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vfloat64m4_t test_vfwnmsac_vf_f64m4(vfloat64m4_t vd, float vs1, vfloat32m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vv_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], <vscale x 8 x float> [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwnmsac_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m4_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vfloat64m8_t test_vfwnmsac_vv_f64m8(vfloat64m8_t vd, vfloat32m4_t vs1, vfloat32m
 
 // CHECK-RV64-LABEL: @test_vfwnmsac_vf_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32.nxv8f32.i64(<vscale x 8 x double> [[VD:%.*]], float [[VS1:%.*]], <vscale x 8 x float> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vfwnmsac_vf_f64m8(vfloat64m8_t vd, float vs1, vfloat32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmacc.c
index 0048d864c4023..c44d44b842844 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vmacc_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint8mf8_t test_vmacc_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vmacc_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint8mf8_t test_vmacc_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vmacc_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint8mf4_t test_vmacc_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vmacc_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint8mf4_t test_vmacc_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vmacc_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint8mf2_t test_vmacc_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vmacc_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint8mf2_t test_vmacc_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vmacc_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint8m1_t test_vmacc_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vmacc_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint8m1_t test_vmacc_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vmacc_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint8m2_t test_vmacc_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vmacc_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint8m2_t test_vmacc_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vmacc_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint8m4_t test_vmacc_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vmacc_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint8m4_t test_vmacc_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vmacc_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint8m8_t test_vmacc_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vmacc_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint8m8_t test_vmacc_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vmacc_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint16mf4_t test_vmacc_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vmacc_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint16mf4_t test_vmacc_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vmacc_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint16mf2_t test_vmacc_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vmacc_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint16mf2_t test_vmacc_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vmacc_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint16m1_t test_vmacc_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vmacc_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint16m1_t test_vmacc_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vmacc_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint16m2_t test_vmacc_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vmacc_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint16m2_t test_vmacc_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vmacc_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint16m4_t test_vmacc_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vmacc_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint16m4_t test_vmacc_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vmacc_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint16m8_t test_vmacc_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vmacc_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint16m8_t test_vmacc_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vmacc_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint32mf2_t test_vmacc_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vmacc_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint32mf2_t test_vmacc_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vmacc_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint32m1_t test_vmacc_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vmacc_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -279,7 +279,7 @@ vint32m1_t test_vmacc_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vmacc_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, size_t vl) {
@@ -288,7 +288,7 @@ vint32m2_t test_vmacc_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vmacc_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -297,7 +297,7 @@ vint32m2_t test_vmacc_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vmacc_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, size_t vl) {
@@ -306,7 +306,7 @@ vint32m4_t test_vmacc_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vmacc_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_t vl) {
@@ -315,7 +315,7 @@ vint32m4_t test_vmacc_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vmacc_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, size_t vl) {
@@ -324,7 +324,7 @@ vint32m8_t test_vmacc_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vmacc_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_t vl) {
@@ -333,7 +333,7 @@ vint32m8_t test_vmacc_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vmacc_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, size_t vl) {
@@ -342,7 +342,7 @@ vint64m1_t test_vmacc_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vmacc_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_t vl) {
@@ -351,7 +351,7 @@ vint64m1_t test_vmacc_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vmacc_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, size_t vl) {
@@ -360,7 +360,7 @@ vint64m2_t test_vmacc_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vmacc_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_t vl) {
@@ -369,7 +369,7 @@ vint64m2_t test_vmacc_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vmacc_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, size_t vl) {
@@ -378,7 +378,7 @@ vint64m4_t test_vmacc_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vmacc_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_t vl) {
@@ -387,7 +387,7 @@ vint64m4_t test_vmacc_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vmacc_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, size_t vl) {
@@ -396,7 +396,7 @@ vint64m8_t test_vmacc_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vmacc_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_t vl) {
@@ -405,7 +405,7 @@ vint64m8_t test_vmacc_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vmacc_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -414,7 +414,7 @@ vuint8mf8_t test_vmacc_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vmacc_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -423,7 +423,7 @@ vuint8mf8_t test_vmacc_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vmacc_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -432,7 +432,7 @@ vuint8mf4_t test_vmacc_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmacc.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vmacc_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -441,7 +441,7 @@ vuint8mf4_t test_vmacc_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vmacc_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -450,7 +450,7 @@ vuint8mf2_t test_vmacc_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmacc.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vmacc_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -459,7 +459,7 @@ vuint8mf2_t test_vmacc_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vmacc_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -468,7 +468,7 @@ vuint8m1_t test_vmacc_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmacc.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vmacc_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -477,7 +477,7 @@ vuint8m1_t test_vmacc_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vmacc_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -486,7 +486,7 @@ vuint8m2_t test_vmacc_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmacc.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vmacc_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -495,7 +495,7 @@ vuint8m2_t test_vmacc_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vmacc_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -504,7 +504,7 @@ vuint8m4_t test_vmacc_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmacc.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vmacc_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -513,7 +513,7 @@ vuint8m4_t test_vmacc_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vmacc_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
@@ -522,7 +522,7 @@ vuint8m8_t test_vmacc_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vmacc_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t vl) {
@@ -531,7 +531,7 @@ vuint8m8_t test_vmacc_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vmacc_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -540,7 +540,7 @@ vuint16mf4_t test_vmacc_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmacc.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vmacc_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -549,7 +549,7 @@ vuint16mf4_t test_vmacc_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vmacc_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -558,7 +558,7 @@ vuint16mf2_t test_vmacc_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmacc.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vmacc_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -567,7 +567,7 @@ vuint16mf2_t test_vmacc_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vmacc_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -576,7 +576,7 @@ vuint16m1_t test_vmacc_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmacc.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vmacc_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -585,7 +585,7 @@ vuint16m1_t test_vmacc_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vmacc_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -594,7 +594,7 @@ vuint16m2_t test_vmacc_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmacc.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vmacc_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -603,7 +603,7 @@ vuint16m2_t test_vmacc_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vmacc_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -612,7 +612,7 @@ vuint16m4_t test_vmacc_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmacc.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vmacc_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -621,7 +621,7 @@ vuint16m4_t test_vmacc_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vmacc_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
@@ -630,7 +630,7 @@ vuint16m8_t test_vmacc_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmacc.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vmacc_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, size_t vl) {
@@ -639,7 +639,7 @@ vuint16m8_t test_vmacc_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vmacc_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -648,7 +648,7 @@ vuint32mf2_t test_vmacc_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmacc.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vmacc_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -657,7 +657,7 @@ vuint32mf2_t test_vmacc_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vmacc_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -666,7 +666,7 @@ vuint32m1_t test_vmacc_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vmacc_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -675,7 +675,7 @@ vuint32m1_t test_vmacc_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vmacc_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -684,7 +684,7 @@ vuint32m2_t test_vmacc_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vmacc_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -693,7 +693,7 @@ vuint32m2_t test_vmacc_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vmacc_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -702,7 +702,7 @@ vuint32m4_t test_vmacc_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmacc.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vmacc_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, size_t vl) {
@@ -711,7 +711,7 @@ vuint32m4_t test_vmacc_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vmacc_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2, size_t vl) {
@@ -720,7 +720,7 @@ vuint32m8_t test_vmacc_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmacc.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vmacc_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, size_t vl) {
@@ -729,7 +729,7 @@ vuint32m8_t test_vmacc_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vmacc_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2, size_t vl) {
@@ -738,7 +738,7 @@ vuint64m1_t test_vmacc_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmacc.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vmacc_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, size_t vl) {
@@ -747,7 +747,7 @@ vuint64m1_t test_vmacc_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vmacc_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2, size_t vl) {
@@ -756,7 +756,7 @@ vuint64m2_t test_vmacc_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmacc.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vmacc_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, size_t vl) {
@@ -765,7 +765,7 @@ vuint64m2_t test_vmacc_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vmacc_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2, size_t vl) {
@@ -774,7 +774,7 @@ vuint64m4_t test_vmacc_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmacc.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vmacc_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, size_t vl) {
@@ -783,7 +783,7 @@ vuint64m4_t test_vmacc_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmacc_vv_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vmacc_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2, size_t vl) {
@@ -792,7 +792,7 @@ vuint64m8_t test_vmacc_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmacc_vx_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmacc.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vmacc_vx_u64m8(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmadd.c
index 92d272121ffa5..68cfa4d3a2094 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vmadd.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vmadd_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint8mf8_t test_vmadd_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vmadd_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint8mf8_t test_vmadd_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vmadd_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint8mf4_t test_vmadd_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vmadd_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint8mf4_t test_vmadd_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vmadd_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint8mf2_t test_vmadd_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vmadd_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint8mf2_t test_vmadd_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vmadd_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint8m1_t test_vmadd_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vmadd_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint8m1_t test_vmadd_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vmadd_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint8m2_t test_vmadd_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vmadd_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint8m2_t test_vmadd_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vmadd_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint8m4_t test_vmadd_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vmadd_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint8m4_t test_vmadd_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vmadd_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint8m8_t test_vmadd_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vmadd_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint8m8_t test_vmadd_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl)
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vmadd_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint16mf4_t test_vmadd_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vmadd_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint16mf4_t test_vmadd_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vmadd_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint16mf2_t test_vmadd_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vmadd_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint16mf2_t test_vmadd_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vmadd_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint16m1_t test_vmadd_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vmadd_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint16m1_t test_vmadd_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vmadd_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint16m2_t test_vmadd_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vmadd_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint16m2_t test_vmadd_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vmadd_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint16m4_t test_vmadd_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vmadd_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint16m4_t test_vmadd_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vmadd_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint16m8_t test_vmadd_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vmadd_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint16m8_t test_vmadd_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vmadd_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint32mf2_t test_vmadd_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vmadd_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint32mf2_t test_vmadd_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vmadd_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint32m1_t test_vmadd_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vmadd_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -279,7 +279,7 @@ vint32m1_t test_vmadd_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vmadd_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, size_t vl) {
@@ -288,7 +288,7 @@ vint32m2_t test_vmadd_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vmadd_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -297,7 +297,7 @@ vint32m2_t test_vmadd_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vmadd_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, size_t vl) {
@@ -306,7 +306,7 @@ vint32m4_t test_vmadd_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vmadd_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_t vl) {
@@ -315,7 +315,7 @@ vint32m4_t test_vmadd_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vmadd_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, size_t vl) {
@@ -324,7 +324,7 @@ vint32m8_t test_vmadd_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vmadd_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_t vl) {
@@ -333,7 +333,7 @@ vint32m8_t test_vmadd_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vmadd_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, size_t vl) {
@@ -342,7 +342,7 @@ vint64m1_t test_vmadd_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vmadd_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_t vl) {
@@ -351,7 +351,7 @@ vint64m1_t test_vmadd_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vmadd_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, size_t vl) {
@@ -360,7 +360,7 @@ vint64m2_t test_vmadd_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vmadd_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_t vl) {
@@ -369,7 +369,7 @@ vint64m2_t test_vmadd_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vmadd_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, size_t vl) {
@@ -378,7 +378,7 @@ vint64m4_t test_vmadd_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vmadd_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_t vl) {
@@ -387,7 +387,7 @@ vint64m4_t test_vmadd_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vmadd_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, size_t vl) {
@@ -396,7 +396,7 @@ vint64m8_t test_vmadd_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vmadd_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_t vl) {
@@ -405,7 +405,7 @@ vint64m8_t test_vmadd_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vmadd_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -414,7 +414,7 @@ vuint8mf8_t test_vmadd_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vmadd_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -423,7 +423,7 @@ vuint8mf8_t test_vmadd_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vmadd_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -432,7 +432,7 @@ vuint8mf4_t test_vmadd_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vmadd.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vmadd_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -441,7 +441,7 @@ vuint8mf4_t test_vmadd_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vmadd_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -450,7 +450,7 @@ vuint8mf2_t test_vmadd_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vmadd.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vmadd_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -459,7 +459,7 @@ vuint8mf2_t test_vmadd_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vmadd_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -468,7 +468,7 @@ vuint8m1_t test_vmadd_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vmadd.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vmadd_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -477,7 +477,7 @@ vuint8m1_t test_vmadd_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vmadd_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -486,7 +486,7 @@ vuint8m2_t test_vmadd_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vmadd.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vmadd_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -495,7 +495,7 @@ vuint8m2_t test_vmadd_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vmadd_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -504,7 +504,7 @@ vuint8m4_t test_vmadd_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vmadd.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vmadd_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -513,7 +513,7 @@ vuint8m4_t test_vmadd_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vmadd_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
@@ -522,7 +522,7 @@ vuint8m8_t test_vmadd_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vmadd.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vmadd_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t vl) {
@@ -531,7 +531,7 @@ vuint8m8_t test_vmadd_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vmadd_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -540,7 +540,7 @@ vuint16mf4_t test_vmadd_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vmadd.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vmadd_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -549,7 +549,7 @@ vuint16mf4_t test_vmadd_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vmadd_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -558,7 +558,7 @@ vuint16mf2_t test_vmadd_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vmadd.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vmadd_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -567,7 +567,7 @@ vuint16mf2_t test_vmadd_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vmadd_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -576,7 +576,7 @@ vuint16m1_t test_vmadd_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vmadd.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vmadd_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -585,7 +585,7 @@ vuint16m1_t test_vmadd_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vmadd_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -594,7 +594,7 @@ vuint16m2_t test_vmadd_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vmadd.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vmadd_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -603,7 +603,7 @@ vuint16m2_t test_vmadd_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vmadd_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -612,7 +612,7 @@ vuint16m4_t test_vmadd_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vmadd.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vmadd_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -621,7 +621,7 @@ vuint16m4_t test_vmadd_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vmadd_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
@@ -630,7 +630,7 @@ vuint16m8_t test_vmadd_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vmadd.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vmadd_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, size_t vl) {
@@ -639,7 +639,7 @@ vuint16m8_t test_vmadd_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vmadd_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -648,7 +648,7 @@ vuint32mf2_t test_vmadd_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vmadd.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vmadd_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -657,7 +657,7 @@ vuint32mf2_t test_vmadd_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vmadd_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -666,7 +666,7 @@ vuint32m1_t test_vmadd_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vmadd.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vmadd_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -675,7 +675,7 @@ vuint32m1_t test_vmadd_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vmadd_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -684,7 +684,7 @@ vuint32m2_t test_vmadd_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vmadd_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -693,7 +693,7 @@ vuint32m2_t test_vmadd_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vmadd_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -702,7 +702,7 @@ vuint32m4_t test_vmadd_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vmadd.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vmadd_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, size_t vl) {
@@ -711,7 +711,7 @@ vuint32m4_t test_vmadd_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vmadd_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2, size_t vl) {
@@ -720,7 +720,7 @@ vuint32m8_t test_vmadd_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vmadd.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vmadd_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, size_t vl) {
@@ -729,7 +729,7 @@ vuint32m8_t test_vmadd_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vmadd_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2, size_t vl) {
@@ -738,7 +738,7 @@ vuint64m1_t test_vmadd_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmadd.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vmadd_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, size_t vl) {
@@ -747,7 +747,7 @@ vuint64m1_t test_vmadd_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vmadd_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2, size_t vl) {
@@ -756,7 +756,7 @@ vuint64m2_t test_vmadd_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmadd.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vmadd_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, size_t vl) {
@@ -765,7 +765,7 @@ vuint64m2_t test_vmadd_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vmadd_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2, size_t vl) {
@@ -774,7 +774,7 @@ vuint64m4_t test_vmadd_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmadd.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vmadd_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, size_t vl) {
@@ -783,7 +783,7 @@ vuint64m4_t test_vmadd_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vmadd_vv_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vmadd_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2, size_t vl) {
@@ -792,7 +792,7 @@ vuint64m8_t test_vmadd_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2
 
 // CHECK-RV64-LABEL: @test_vmadd_vx_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmadd.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vmadd_vx_u64m8(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vnmsac.c
index 17e84017be333..ddc6a60903c82 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vnmsac.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vnmsac_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint8mf8_t test_vnmsac_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vnmsac_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint8mf8_t test_vnmsac_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vnmsac_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint8mf4_t test_vnmsac_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vnmsac_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint8mf4_t test_vnmsac_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vnmsac_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint8mf2_t test_vnmsac_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vnmsac_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint8mf2_t test_vnmsac_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vnmsac_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint8m1_t test_vnmsac_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vnmsac_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint8m1_t test_vnmsac_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vnmsac_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint8m2_t test_vnmsac_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vnmsac_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint8m2_t test_vnmsac_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vnmsac_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint8m4_t test_vnmsac_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vnmsac_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint8m4_t test_vnmsac_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vnmsac_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint8m8_t test_vnmsac_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vnmsac_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint8m8_t test_vnmsac_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vnmsac_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint16mf4_t test_vnmsac_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vnmsac_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint16mf4_t test_vnmsac_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vnmsac_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint16mf2_t test_vnmsac_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vnmsac_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint16mf2_t test_vnmsac_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vnmsac_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint16m1_t test_vnmsac_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vnmsac_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint16m1_t test_vnmsac_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vnmsac_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint16m2_t test_vnmsac_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vnmsac_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint16m2_t test_vnmsac_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vnmsac_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint16m4_t test_vnmsac_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vnmsac_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint16m4_t test_vnmsac_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vnmsac_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint16m8_t test_vnmsac_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vnmsac_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint16m8_t test_vnmsac_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vnmsac_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint32mf2_t test_vnmsac_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vnmsac_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint32mf2_t test_vnmsac_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vnmsac_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint32m1_t test_vnmsac_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vnmsac_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -279,7 +279,7 @@ vint32m1_t test_vnmsac_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vnmsac_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, size_t vl) {
@@ -288,7 +288,7 @@ vint32m2_t test_vnmsac_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vnmsac_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -297,7 +297,7 @@ vint32m2_t test_vnmsac_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vnmsac_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, size_t vl) {
@@ -306,7 +306,7 @@ vint32m4_t test_vnmsac_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vnmsac_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_t vl) {
@@ -315,7 +315,7 @@ vint32m4_t test_vnmsac_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vnmsac_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, size_t vl) {
@@ -324,7 +324,7 @@ vint32m8_t test_vnmsac_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vnmsac_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_t vl) {
@@ -333,7 +333,7 @@ vint32m8_t test_vnmsac_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vnmsac_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, size_t vl) {
@@ -342,7 +342,7 @@ vint64m1_t test_vnmsac_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vnmsac_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_t vl) {
@@ -351,7 +351,7 @@ vint64m1_t test_vnmsac_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vnmsac_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, size_t vl) {
@@ -360,7 +360,7 @@ vint64m2_t test_vnmsac_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vnmsac_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_t vl) {
@@ -369,7 +369,7 @@ vint64m2_t test_vnmsac_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vnmsac_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, size_t vl) {
@@ -378,7 +378,7 @@ vint64m4_t test_vnmsac_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vnmsac_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_t vl) {
@@ -387,7 +387,7 @@ vint64m4_t test_vnmsac_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vnmsac_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, size_t vl) {
@@ -396,7 +396,7 @@ vint64m8_t test_vnmsac_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vnmsac_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_t vl) {
@@ -405,7 +405,7 @@ vint64m8_t test_vnmsac_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vnmsac_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -414,7 +414,7 @@ vuint8mf8_t test_vnmsac_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vnmsac_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -423,7 +423,7 @@ vuint8mf8_t test_vnmsac_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vnmsac_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -432,7 +432,7 @@ vuint8mf4_t test_vnmsac_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsac.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vnmsac_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -441,7 +441,7 @@ vuint8mf4_t test_vnmsac_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vnmsac_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -450,7 +450,7 @@ vuint8mf2_t test_vnmsac_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsac.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vnmsac_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -459,7 +459,7 @@ vuint8mf2_t test_vnmsac_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vnmsac_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -468,7 +468,7 @@ vuint8m1_t test_vnmsac_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsac.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vnmsac_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -477,7 +477,7 @@ vuint8m1_t test_vnmsac_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vnmsac_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -486,7 +486,7 @@ vuint8m2_t test_vnmsac_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsac.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vnmsac_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -495,7 +495,7 @@ vuint8m2_t test_vnmsac_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vnmsac_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -504,7 +504,7 @@ vuint8m4_t test_vnmsac_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsac.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vnmsac_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -513,7 +513,7 @@ vuint8m4_t test_vnmsac_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vnmsac_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
@@ -522,7 +522,7 @@ vuint8m8_t test_vnmsac_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsac.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vnmsac_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t vl) {
@@ -531,7 +531,7 @@ vuint8m8_t test_vnmsac_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vnmsac_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -540,7 +540,7 @@ vuint16mf4_t test_vnmsac_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsac.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vnmsac_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -549,7 +549,7 @@ vuint16mf4_t test_vnmsac_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vnmsac_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -558,7 +558,7 @@ vuint16mf2_t test_vnmsac_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsac.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vnmsac_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -567,7 +567,7 @@ vuint16mf2_t test_vnmsac_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vnmsac_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -576,7 +576,7 @@ vuint16m1_t test_vnmsac_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsac.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vnmsac_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -585,7 +585,7 @@ vuint16m1_t test_vnmsac_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vnmsac_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -594,7 +594,7 @@ vuint16m2_t test_vnmsac_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsac.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vnmsac_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -603,7 +603,7 @@ vuint16m2_t test_vnmsac_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vnmsac_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -612,7 +612,7 @@ vuint16m4_t test_vnmsac_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsac.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vnmsac_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -621,7 +621,7 @@ vuint16m4_t test_vnmsac_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vnmsac_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
@@ -630,7 +630,7 @@ vuint16m8_t test_vnmsac_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsac.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vnmsac_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, size_t vl) {
@@ -639,7 +639,7 @@ vuint16m8_t test_vnmsac_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vnmsac_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -648,7 +648,7 @@ vuint32mf2_t test_vnmsac_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsac.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vnmsac_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -657,7 +657,7 @@ vuint32mf2_t test_vnmsac_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vnmsac_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -666,7 +666,7 @@ vuint32m1_t test_vnmsac_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsac.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vnmsac_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -675,7 +675,7 @@ vuint32m1_t test_vnmsac_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vnmsac_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -684,7 +684,7 @@ vuint32m2_t test_vnmsac_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vnmsac_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -693,7 +693,7 @@ vuint32m2_t test_vnmsac_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vnmsac_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -702,7 +702,7 @@ vuint32m4_t test_vnmsac_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsac.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vnmsac_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, size_t vl) {
@@ -711,7 +711,7 @@ vuint32m4_t test_vnmsac_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vnmsac_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2, size_t vl) {
@@ -720,7 +720,7 @@ vuint32m8_t test_vnmsac_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsac.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vnmsac_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, size_t vl) {
@@ -729,7 +729,7 @@ vuint32m8_t test_vnmsac_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vnmsac_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2, size_t vl) {
@@ -738,7 +738,7 @@ vuint64m1_t test_vnmsac_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsac.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vnmsac_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, size_t vl) {
@@ -747,7 +747,7 @@ vuint64m1_t test_vnmsac_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vnmsac_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2, size_t vl) {
@@ -756,7 +756,7 @@ vuint64m2_t test_vnmsac_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsac.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vnmsac_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, size_t vl) {
@@ -765,7 +765,7 @@ vuint64m2_t test_vnmsac_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vnmsac_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2, size_t vl) {
@@ -774,7 +774,7 @@ vuint64m4_t test_vnmsac_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsac.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vnmsac_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, size_t vl) {
@@ -783,7 +783,7 @@ vuint64m4_t test_vnmsac_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsac_vv_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vnmsac_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2, size_t vl) {
@@ -792,7 +792,7 @@ vuint64m8_t test_vnmsac_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsac_vx_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsac.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vnmsac_vx_u64m8(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vnmsub.c
index 5fe15e6ece2a2..27f7e4eaa804d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vnmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vnmsub.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vnmsub_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint8mf8_t test_vnmsub_vv_i8mf8(vint8mf8_t vd, vint8mf8_t vs1, vint8mf8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vint8mf8_t test_vnmsub_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint8mf8_t test_vnmsub_vx_i8mf8(vint8mf8_t vd, int8_t rs1, vint8mf8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vnmsub_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint8mf4_t test_vnmsub_vv_i8mf4(vint8mf4_t vd, vint8mf4_t vs1, vint8mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vint8mf4_t test_vnmsub_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint8mf4_t test_vnmsub_vx_i8mf4(vint8mf4_t vd, int8_t rs1, vint8mf4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vnmsub_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint8mf2_t test_vnmsub_vv_i8mf2(vint8mf2_t vd, vint8mf2_t vs1, vint8mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vint8mf2_t test_vnmsub_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint8mf2_t test_vnmsub_vx_i8mf2(vint8mf2_t vd, int8_t rs1, vint8mf2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vnmsub_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint8m1_t test_vnmsub_vv_i8m1(vint8m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vint8m1_t test_vnmsub_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint8m1_t test_vnmsub_vx_i8m1(vint8m1_t vd, int8_t rs1, vint8m1_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vnmsub_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint8m2_t test_vnmsub_vv_i8m2(vint8m2_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vint8m2_t test_vnmsub_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint8m2_t test_vnmsub_vx_i8m2(vint8m2_t vd, int8_t rs1, vint8m2_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vnmsub_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint8m4_t test_vnmsub_vv_i8m4(vint8m4_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vint8m4_t test_vnmsub_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint8m4_t test_vnmsub_vx_i8m4(vint8m4_t vd, int8_t rs1, vint8m4_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vnmsub_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint8m8_t test_vnmsub_vv_i8m8(vint8m8_t vd, vint8m8_t vs1, vint8m8_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vint8m8_t test_vnmsub_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint8m8_t test_vnmsub_vx_i8m8(vint8m8_t vd, int8_t rs1, vint8m8_t vs2, size_t vl
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vnmsub_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint16mf4_t test_vnmsub_vv_i16mf4(vint16mf4_t vd, vint16mf4_t vs1, vint16mf4_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vnmsub_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint16mf4_t test_vnmsub_vx_i16mf4(vint16mf4_t vd, int16_t rs1, vint16mf4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vnmsub_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint16mf2_t test_vnmsub_vv_i16mf2(vint16mf2_t vd, vint16mf2_t vs1, vint16mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vnmsub_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint16mf2_t test_vnmsub_vx_i16mf2(vint16mf2_t vd, int16_t rs1, vint16mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vnmsub_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint16m1_t test_vnmsub_vv_i16m1(vint16m1_t vd, vint16m1_t vs1, vint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vnmsub_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint16m1_t test_vnmsub_vx_i16m1(vint16m1_t vd, int16_t rs1, vint16m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vnmsub_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint16m2_t test_vnmsub_vv_i16m2(vint16m2_t vd, vint16m2_t vs1, vint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vnmsub_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint16m2_t test_vnmsub_vx_i16m2(vint16m2_t vd, int16_t rs1, vint16m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vnmsub_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint16m4_t test_vnmsub_vv_i16m4(vint16m4_t vd, vint16m4_t vs1, vint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vnmsub_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint16m4_t test_vnmsub_vx_i16m4(vint16m4_t vd, int16_t rs1, vint16m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vnmsub_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint16m8_t test_vnmsub_vv_i16m8(vint16m8_t vd, vint16m8_t vs1, vint16m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vnmsub_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint16m8_t test_vnmsub_vx_i16m8(vint16m8_t vd, int16_t rs1, vint16m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vnmsub_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint32mf2_t test_vnmsub_vv_i32mf2(vint32mf2_t vd, vint32mf2_t vs1, vint32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vnmsub_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint32mf2_t test_vnmsub_vx_i32mf2(vint32mf2_t vd, int32_t rs1, vint32mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vnmsub_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint32m1_t test_vnmsub_vv_i32m1(vint32m1_t vd, vint32m1_t vs1, vint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vnmsub_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -279,7 +279,7 @@ vint32m1_t test_vnmsub_vx_i32m1(vint32m1_t vd, int32_t rs1, vint32m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vnmsub_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, size_t vl) {
@@ -288,7 +288,7 @@ vint32m2_t test_vnmsub_vv_i32m2(vint32m2_t vd, vint32m2_t vs1, vint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vnmsub_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -297,7 +297,7 @@ vint32m2_t test_vnmsub_vx_i32m2(vint32m2_t vd, int32_t rs1, vint32m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vnmsub_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, size_t vl) {
@@ -306,7 +306,7 @@ vint32m4_t test_vnmsub_vv_i32m4(vint32m4_t vd, vint32m4_t vs1, vint32m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vnmsub_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size_t vl) {
@@ -315,7 +315,7 @@ vint32m4_t test_vnmsub_vx_i32m4(vint32m4_t vd, int32_t rs1, vint32m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vnmsub_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, size_t vl) {
@@ -324,7 +324,7 @@ vint32m8_t test_vnmsub_vv_i32m8(vint32m8_t vd, vint32m8_t vs1, vint32m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vnmsub_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size_t vl) {
@@ -333,7 +333,7 @@ vint32m8_t test_vnmsub_vx_i32m8(vint32m8_t vd, int32_t rs1, vint32m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vnmsub_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, size_t vl) {
@@ -342,7 +342,7 @@ vint64m1_t test_vnmsub_vv_i64m1(vint64m1_t vd, vint64m1_t vs1, vint64m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vnmsub_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size_t vl) {
@@ -351,7 +351,7 @@ vint64m1_t test_vnmsub_vx_i64m1(vint64m1_t vd, int64_t rs1, vint64m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vnmsub_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, size_t vl) {
@@ -360,7 +360,7 @@ vint64m2_t test_vnmsub_vv_i64m2(vint64m2_t vd, vint64m2_t vs1, vint64m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vnmsub_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size_t vl) {
@@ -369,7 +369,7 @@ vint64m2_t test_vnmsub_vx_i64m2(vint64m2_t vd, int64_t rs1, vint64m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vnmsub_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, size_t vl) {
@@ -378,7 +378,7 @@ vint64m4_t test_vnmsub_vv_i64m4(vint64m4_t vd, vint64m4_t vs1, vint64m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vnmsub_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size_t vl) {
@@ -387,7 +387,7 @@ vint64m4_t test_vnmsub_vx_i64m4(vint64m4_t vd, int64_t rs1, vint64m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vnmsub_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, size_t vl) {
@@ -396,7 +396,7 @@ vint64m8_t test_vnmsub_vv_i64m8(vint64m8_t vd, vint64m8_t vs1, vint64m8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vnmsub_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size_t vl) {
@@ -405,7 +405,7 @@ vint64m8_t test_vnmsub_vx_i64m8(vint64m8_t vd, int64_t rs1, vint64m8_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vnmsub_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -414,7 +414,7 @@ vuint8mf8_t test_vnmsub_vv_u8mf8(vuint8mf8_t vd, vuint8mf8_t vs1, vuint8mf8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8mf8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.i8.i64(<vscale x 1 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP0]]
 //
 vuint8mf8_t test_vnmsub_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -423,7 +423,7 @@ vuint8mf8_t test_vnmsub_vx_u8mf8(vuint8mf8_t vd, uint8_t rs1, vuint8mf8_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vnmsub_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -432,7 +432,7 @@ vuint8mf4_t test_vnmsub_vv_u8mf4(vuint8mf4_t vd, vuint8mf4_t vs1, vuint8mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vnmsub.nxv2i8.i8.i64(<vscale x 2 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP0]]
 //
 vuint8mf4_t test_vnmsub_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -441,7 +441,7 @@ vuint8mf4_t test_vnmsub_vx_u8mf4(vuint8mf4_t vd, uint8_t rs1, vuint8mf4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vnmsub_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -450,7 +450,7 @@ vuint8mf2_t test_vnmsub_vv_u8mf2(vuint8mf2_t vd, vuint8mf2_t vs1, vuint8mf2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vnmsub.nxv4i8.i8.i64(<vscale x 4 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
 //
 vuint8mf2_t test_vnmsub_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -459,7 +459,7 @@ vuint8mf2_t test_vnmsub_vx_u8mf2(vuint8mf2_t vd, uint8_t rs1, vuint8mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vnmsub_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -468,7 +468,7 @@ vuint8m1_t test_vnmsub_vv_u8m1(vuint8m1_t vd, vuint8m1_t vs1, vuint8m1_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vnmsub.nxv8i8.i8.i64(<vscale x 8 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP0]]
 //
 vuint8m1_t test_vnmsub_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -477,7 +477,7 @@ vuint8m1_t test_vnmsub_vx_u8m1(vuint8m1_t vd, uint8_t rs1, vuint8m1_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vnmsub_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -486,7 +486,7 @@ vuint8m2_t test_vnmsub_vv_u8m2(vuint8m2_t vd, vuint8m2_t vs1, vuint8m2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vnmsub.nxv16i8.i8.i64(<vscale x 16 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 vuint8m2_t test_vnmsub_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -495,7 +495,7 @@ vuint8m2_t test_vnmsub_vx_u8m2(vuint8m2_t vd, uint8_t rs1, vuint8m2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vnmsub_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -504,7 +504,7 @@ vuint8m4_t test_vnmsub_vv_u8m4(vuint8m4_t vd, vuint8m4_t vs1, vuint8m4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vnmsub.nxv32i8.i8.i64(<vscale x 32 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP0]]
 //
 vuint8m4_t test_vnmsub_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -513,7 +513,7 @@ vuint8m4_t test_vnmsub_vx_u8m4(vuint8m4_t vd, uint8_t rs1, vuint8m4_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> [[VD:%.*]], <vscale x 64 x i8> [[VS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vnmsub_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, size_t vl) {
@@ -522,7 +522,7 @@ vuint8m8_t test_vnmsub_vv_u8m8(vuint8m8_t vd, vuint8m8_t vs1, vuint8m8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u8m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vnmsub.nxv64i8.i8.i64(<vscale x 64 x i8> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP0]]
 //
 vuint8m8_t test_vnmsub_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_t vl) {
@@ -531,7 +531,7 @@ vuint8m8_t test_vnmsub_vx_u8m8(vuint8m8_t vd, uint8_t rs1, vuint8m8_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vnmsub_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -540,7 +540,7 @@ vuint16mf4_t test_vnmsub_vv_u16mf4(vuint16mf4_t vd, vuint16mf4_t vs1, vuint16mf4
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vnmsub.nxv1i16.i16.i64(<vscale x 1 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vnmsub_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -549,7 +549,7 @@ vuint16mf4_t test_vnmsub_vx_u16mf4(vuint16mf4_t vd, uint16_t rs1, vuint16mf4_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vnmsub_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -558,7 +558,7 @@ vuint16mf2_t test_vnmsub_vv_u16mf2(vuint16mf2_t vd, vuint16mf2_t vs1, vuint16mf2
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vnmsub.nxv2i16.i16.i64(<vscale x 2 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vnmsub_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -567,7 +567,7 @@ vuint16mf2_t test_vnmsub_vx_u16mf2(vuint16mf2_t vd, uint16_t rs1, vuint16mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vnmsub_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -576,7 +576,7 @@ vuint16m1_t test_vnmsub_vv_u16m1(vuint16m1_t vd, vuint16m1_t vs1, vuint16m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vnmsub.nxv4i16.i16.i64(<vscale x 4 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vnmsub_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -585,7 +585,7 @@ vuint16m1_t test_vnmsub_vx_u16m1(vuint16m1_t vd, uint16_t rs1, vuint16m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vnmsub_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -594,7 +594,7 @@ vuint16m2_t test_vnmsub_vv_u16m2(vuint16m2_t vd, vuint16m2_t vs1, vuint16m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vnmsub.nxv8i16.i16.i64(<vscale x 8 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vnmsub_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -603,7 +603,7 @@ vuint16m2_t test_vnmsub_vx_u16m2(vuint16m2_t vd, uint16_t rs1, vuint16m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vnmsub_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -612,7 +612,7 @@ vuint16m4_t test_vnmsub_vv_u16m4(vuint16m4_t vd, vuint16m4_t vs1, vuint16m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vnmsub.nxv16i16.i16.i64(<vscale x 16 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vnmsub_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -621,7 +621,7 @@ vuint16m4_t test_vnmsub_vx_u16m4(vuint16m4_t vd, uint16_t rs1, vuint16m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i16> [[VS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vnmsub_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs2, size_t vl) {
@@ -630,7 +630,7 @@ vuint16m8_t test_vnmsub_vv_u16m8(vuint16m8_t vd, vuint16m8_t vs1, vuint16m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vnmsub.nxv32i16.i16.i64(<vscale x 32 x i16> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 32 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vnmsub_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2, size_t vl) {
@@ -639,7 +639,7 @@ vuint16m8_t test_vnmsub_vx_u16m8(vuint16m8_t vd, uint16_t rs1, vuint16m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vnmsub_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -648,7 +648,7 @@ vuint32mf2_t test_vnmsub_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs1, vuint32mf2
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vnmsub.nxv1i32.i32.i64(<vscale x 1 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vnmsub_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -657,7 +657,7 @@ vuint32mf2_t test_vnmsub_vx_u32mf2(vuint32mf2_t vd, uint32_t rs1, vuint32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vnmsub_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -666,7 +666,7 @@ vuint32m1_t test_vnmsub_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs1, vuint32m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vnmsub.nxv2i32.i32.i64(<vscale x 2 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vnmsub_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -675,7 +675,7 @@ vuint32m1_t test_vnmsub_vx_u32m1(vuint32m1_t vd, uint32_t rs1, vuint32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vnmsub_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -684,7 +684,7 @@ vuint32m2_t test_vnmsub_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs1, vuint32m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.i32.i64(<vscale x 4 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vnmsub_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -693,7 +693,7 @@ vuint32m2_t test_vnmsub_vx_u32m2(vuint32m2_t vd, uint32_t rs1, vuint32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vnmsub_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -702,7 +702,7 @@ vuint32m4_t test_vnmsub_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs1, vuint32m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vnmsub.nxv8i32.i32.i64(<vscale x 8 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vnmsub_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2, size_t vl) {
@@ -711,7 +711,7 @@ vuint32m4_t test_vnmsub_vx_u32m4(vuint32m4_t vd, uint32_t rs1, vuint32m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i32> [[VS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vnmsub_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs2, size_t vl) {
@@ -720,7 +720,7 @@ vuint32m8_t test_vnmsub_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs1, vuint32m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vnmsub.nxv16i32.i32.i64(<vscale x 16 x i32> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 16 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vnmsub_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2, size_t vl) {
@@ -729,7 +729,7 @@ vuint32m8_t test_vnmsub_vx_u32m8(vuint32m8_t vd, uint32_t rs1, vuint32m8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i64> [[VS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vnmsub_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs2, size_t vl) {
@@ -738,7 +738,7 @@ vuint64m1_t test_vnmsub_vv_u64m1(vuint64m1_t vd, vuint64m1_t vs1, vuint64m1_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vnmsub.nxv1i64.i64.i64(<vscale x 1 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 1 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vnmsub_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2, size_t vl) {
@@ -747,7 +747,7 @@ vuint64m1_t test_vnmsub_vx_u64m1(vuint64m1_t vd, uint64_t rs1, vuint64m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i64> [[VS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vnmsub_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs2, size_t vl) {
@@ -756,7 +756,7 @@ vuint64m2_t test_vnmsub_vv_u64m2(vuint64m2_t vd, vuint64m2_t vs1, vuint64m2_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vnmsub.nxv2i64.i64.i64(<vscale x 2 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 2 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vnmsub_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2, size_t vl) {
@@ -765,7 +765,7 @@ vuint64m2_t test_vnmsub_vx_u64m2(vuint64m2_t vd, uint64_t rs1, vuint64m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i64> [[VS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vnmsub_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs2, size_t vl) {
@@ -774,7 +774,7 @@ vuint64m4_t test_vnmsub_vv_u64m4(vuint64m4_t vd, vuint64m4_t vs1, vuint64m4_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vnmsub.nxv4i64.i64.i64(<vscale x 4 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 4 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vnmsub_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2, size_t vl) {
@@ -783,7 +783,7 @@ vuint64m4_t test_vnmsub_vx_u64m4(vuint64m4_t vd, uint64_t rs1, vuint64m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vnmsub_vv_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i64> [[VS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vnmsub_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs2, size_t vl) {
@@ -792,7 +792,7 @@ vuint64m8_t test_vnmsub_vv_u64m8(vuint64m8_t vd, vuint64m8_t vs1, vuint64m8_t vs
 
 // CHECK-RV64-LABEL: @test_vnmsub_vx_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vnmsub.nxv8i64.i64.i64(<vscale x 8 x i64> [[VD:%.*]], i64 [[RS1:%.*]], <vscale x 8 x i64> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vnmsub_vx_u64m8(vuint64m8_t vd, uint64_t rs1, vuint64m8_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmacc.c
index 598c50f929bfa..9fcec8a49ec49 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmacc.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmacc.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmacc.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vwmacc_vv_i16mf4(vint16mf4_t vd, vint8mf8_t vs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint16mf4_t test_vwmacc_vv_i16mf4(vint16mf4_t vd, vint8mf8_t vs1, vint8mf8_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmacc.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmacc.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vwmacc_vx_i16mf4(vint16mf4_t vd, int8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint16mf4_t test_vwmacc_vx_i16mf4(vint16mf4_t vd, int8_t rs1, vint8mf8_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmacc.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmacc.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vwmacc_vv_i16mf2(vint16mf2_t vd, vint8mf4_t vs1, vint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint16mf2_t test_vwmacc_vv_i16mf2(vint16mf2_t vd, vint8mf4_t vs1, vint8mf4_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmacc.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmacc.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vwmacc_vx_i16mf2(vint16mf2_t vd, int8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint16mf2_t test_vwmacc_vx_i16mf2(vint16mf2_t vd, int8_t rs1, vint8mf4_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmacc.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmacc.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vwmacc_vv_i16m1(vint16m1_t vd, vint8mf2_t vs1, vint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint16m1_t test_vwmacc_vv_i16m1(vint16m1_t vd, vint8mf2_t vs1, vint8mf2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmacc.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmacc.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vwmacc_vx_i16m1(vint16m1_t vd, int8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint16m1_t test_vwmacc_vx_i16m1(vint16m1_t vd, int8_t rs1, vint8mf2_t vs2, size_
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmacc.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmacc.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vwmacc_vv_i16m2(vint16m2_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint16m2_t test_vwmacc_vv_i16m2(vint16m2_t vd, vint8m1_t vs1, vint8m1_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmacc.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmacc.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vwmacc_vx_i16m2(vint16m2_t vd, int8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint16m2_t test_vwmacc_vx_i16m2(vint16m2_t vd, int8_t rs1, vint8m1_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmacc.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmacc.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vwmacc_vv_i16m4(vint16m4_t vd, vint8m2_t vs1, vint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint16m4_t test_vwmacc_vv_i16m4(vint16m4_t vd, vint8m2_t vs1, vint8m2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmacc.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmacc.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vwmacc_vx_i16m4(vint16m4_t vd, int8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint16m4_t test_vwmacc_vx_i16m4(vint16m4_t vd, int8_t rs1, vint8m2_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmacc.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmacc.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vwmacc_vv_i16m8(vint16m8_t vd, vint8m4_t vs1, vint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint16m8_t test_vwmacc_vv_i16m8(vint16m8_t vd, vint8m4_t vs1, vint8m4_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmacc.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmacc.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vwmacc_vx_i16m8(vint16m8_t vd, int8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint16m8_t test_vwmacc_vx_i16m8(vint16m8_t vd, int8_t rs1, vint8m4_t vs2, size_t
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmacc.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmacc.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vwmacc_vv_i32mf2(vint32mf2_t vd, vint16mf4_t vs1, vint16mf4_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint32mf2_t test_vwmacc_vv_i32mf2(vint32mf2_t vd, vint16mf4_t vs1, vint16mf4_t v
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmacc.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmacc.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vwmacc_vx_i32mf2(vint32mf2_t vd, int16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint32mf2_t test_vwmacc_vx_i32mf2(vint32mf2_t vd, int16_t rs1, vint16mf4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmacc.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmacc.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vwmacc_vv_i32m1(vint32m1_t vd, vint16mf2_t vs1, vint16mf2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint32m1_t test_vwmacc_vv_i32m1(vint32m1_t vd, vint16mf2_t vs1, vint16mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmacc.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmacc.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vwmacc_vx_i32m1(vint32m1_t vd, int16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint32m1_t test_vwmacc_vx_i32m1(vint32m1_t vd, int16_t rs1, vint16mf2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vwmacc_vv_i32m2(vint32m2_t vd, vint16m1_t vs1, vint16m1_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint32m2_t test_vwmacc_vv_i32m2(vint32m2_t vd, vint16m1_t vs1, vint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vwmacc_vx_i32m2(vint32m2_t vd, int16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint32m2_t test_vwmacc_vx_i32m2(vint32m2_t vd, int16_t rs1, vint16m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmacc.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmacc.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vwmacc_vv_i32m4(vint32m4_t vd, vint16m2_t vs1, vint16m2_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint32m4_t test_vwmacc_vv_i32m4(vint32m4_t vd, vint16m2_t vs1, vint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmacc.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmacc.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vwmacc_vx_i32m4(vint32m4_t vd, int16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint32m4_t test_vwmacc_vx_i32m4(vint32m4_t vd, int16_t rs1, vint16m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmacc.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmacc.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vwmacc_vv_i32m8(vint32m8_t vd, vint16m4_t vs1, vint16m4_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint32m8_t test_vwmacc_vv_i32m8(vint32m8_t vd, vint16m4_t vs1, vint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmacc.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmacc.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vwmacc_vx_i32m8(vint32m8_t vd, int16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint32m8_t test_vwmacc_vx_i32m8(vint32m8_t vd, int16_t rs1, vint16m4_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmacc.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmacc.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vwmacc_vv_i64m1(vint64m1_t vd, vint32mf2_t vs1, vint32mf2_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint64m1_t test_vwmacc_vv_i64m1(vint64m1_t vd, vint32mf2_t vs1, vint32mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmacc.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmacc.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vwmacc_vx_i64m1(vint64m1_t vd, int32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint64m1_t test_vwmacc_vx_i64m1(vint64m1_t vd, int32_t rs1, vint32mf2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmacc.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmacc.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vwmacc_vv_i64m2(vint64m2_t vd, vint32m1_t vs1, vint32m1_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint64m2_t test_vwmacc_vv_i64m2(vint64m2_t vd, vint32m1_t vs1, vint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmacc.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmacc.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vwmacc_vx_i64m2(vint64m2_t vd, int32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint64m2_t test_vwmacc_vx_i64m2(vint64m2_t vd, int32_t rs1, vint32m1_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vwmacc_vv_i64m4(vint64m4_t vd, vint32m2_t vs1, vint32m2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint64m4_t test_vwmacc_vv_i64m4(vint64m4_t vd, vint32m2_t vs1, vint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vwmacc_vx_i64m4(vint64m4_t vd, int32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint64m4_t test_vwmacc_vx_i64m4(vint64m4_t vd, int32_t rs1, vint32m2_t vs2, size
 
 // CHECK-RV64-LABEL: @test_vwmacc_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmacc.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmacc.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vwmacc_vv_i64m8(vint64m8_t vd, vint32m4_t vs1, vint32m4_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint64m8_t test_vwmacc_vv_i64m8(vint64m8_t vd, vint32m4_t vs1, vint32m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmacc_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmacc.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmacc.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vwmacc_vx_i64m8(vint64m8_t vd, int32_t rs1, vint32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccsu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccsu.c
index 6f92dfcf39e9e..a35b9ca237eb9 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccsu.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccsu.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vwmaccsu_vv_i16mf4(vint16mf4_t vd, vint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint16mf4_t test_vwmaccsu_vv_i16mf4(vint16mf4_t vd, vint8mf8_t vs1, vuint8mf8_t
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccsu.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccsu.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vwmaccsu_vx_i16mf4(vint16mf4_t vd, int8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint16mf4_t test_vwmaccsu_vx_i16mf4(vint16mf4_t vd, int8_t rs1, vuint8mf8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccsu.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccsu.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vwmaccsu_vv_i16mf2(vint16mf2_t vd, vint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint16mf2_t test_vwmaccsu_vv_i16mf2(vint16mf2_t vd, vint8mf4_t vs1, vuint8mf4_t
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccsu.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccsu.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vwmaccsu_vx_i16mf2(vint16mf2_t vd, int8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint16mf2_t test_vwmaccsu_vx_i16mf2(vint16mf2_t vd, int8_t rs1, vuint8mf4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccsu.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccsu.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vwmaccsu_vv_i16m1(vint16m1_t vd, vint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint16m1_t test_vwmaccsu_vv_i16m1(vint16m1_t vd, vint8mf2_t vs1, vuint8mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccsu.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccsu.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vwmaccsu_vx_i16m1(vint16m1_t vd, int8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint16m1_t test_vwmaccsu_vx_i16m1(vint16m1_t vd, int8_t rs1, vuint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccsu.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccsu.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vwmaccsu_vv_i16m2(vint16m2_t vd, vint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint16m2_t test_vwmaccsu_vv_i16m2(vint16m2_t vd, vint8m1_t vs1, vuint8m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccsu.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccsu.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vwmaccsu_vx_i16m2(vint16m2_t vd, int8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint16m2_t test_vwmaccsu_vx_i16m2(vint16m2_t vd, int8_t rs1, vuint8m1_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccsu.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccsu.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vwmaccsu_vv_i16m4(vint16m4_t vd, vint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint16m4_t test_vwmaccsu_vv_i16m4(vint16m4_t vd, vint8m2_t vs1, vuint8m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccsu.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccsu.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vwmaccsu_vx_i16m4(vint16m4_t vd, int8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint16m4_t test_vwmaccsu_vx_i16m4(vint16m4_t vd, int8_t rs1, vuint8m2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccsu.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccsu.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vwmaccsu_vv_i16m8(vint16m8_t vd, vint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint16m8_t test_vwmaccsu_vv_i16m8(vint16m8_t vd, vint8m4_t vs1, vuint8m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccsu.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccsu.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vwmaccsu_vx_i16m8(vint16m8_t vd, int8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint16m8_t test_vwmaccsu_vx_i16m8(vint16m8_t vd, int8_t rs1, vuint8m4_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccsu.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccsu.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vwmaccsu_vv_i32mf2(vint32mf2_t vd, vint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint32mf2_t test_vwmaccsu_vv_i32mf2(vint32mf2_t vd, vint16mf4_t vs1, vuint16mf4_
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccsu.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccsu.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vwmaccsu_vx_i32mf2(vint32mf2_t vd, int16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint32mf2_t test_vwmaccsu_vx_i32mf2(vint32mf2_t vd, int16_t rs1, vuint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccsu.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccsu.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vwmaccsu_vv_i32m1(vint32m1_t vd, vint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vint32m1_t test_vwmaccsu_vv_i32m1(vint32m1_t vd, vint16mf2_t vs1, vuint16mf2_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccsu.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccsu.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vwmaccsu_vx_i32m1(vint32m1_t vd, int16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vint32m1_t test_vwmaccsu_vx_i32m1(vint32m1_t vd, int16_t rs1, vuint16mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vwmaccsu_vv_i32m2(vint32m2_t vd, vint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vint32m2_t test_vwmaccsu_vv_i32m2(vint32m2_t vd, vint16m1_t vs1, vuint16m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vwmaccsu_vx_i32m2(vint32m2_t vd, int16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vint32m2_t test_vwmaccsu_vx_i32m2(vint32m2_t vd, int16_t rs1, vuint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccsu.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccsu.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vwmaccsu_vv_i32m4(vint32m4_t vd, vint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vint32m4_t test_vwmaccsu_vv_i32m4(vint32m4_t vd, vint16m2_t vs1, vuint16m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccsu.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccsu.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vwmaccsu_vx_i32m4(vint32m4_t vd, int16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vint32m4_t test_vwmaccsu_vx_i32m4(vint32m4_t vd, int16_t rs1, vuint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccsu.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccsu.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vwmaccsu_vv_i32m8(vint32m8_t vd, vint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vint32m8_t test_vwmaccsu_vv_i32m8(vint32m8_t vd, vint16m4_t vs1, vuint16m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccsu.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccsu.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vwmaccsu_vx_i32m8(vint32m8_t vd, int16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vint32m8_t test_vwmaccsu_vx_i32m8(vint32m8_t vd, int16_t rs1, vuint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccsu.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccsu.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vwmaccsu_vv_i64m1(vint64m1_t vd, vint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vint64m1_t test_vwmaccsu_vv_i64m1(vint64m1_t vd, vint32mf2_t vs1, vuint32mf2_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccsu.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccsu.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vwmaccsu_vx_i64m1(vint64m1_t vd, int32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vint64m1_t test_vwmaccsu_vx_i64m1(vint64m1_t vd, int32_t rs1, vuint32mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccsu.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccsu.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vwmaccsu_vv_i64m2(vint64m2_t vd, vint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vint64m2_t test_vwmaccsu_vv_i64m2(vint64m2_t vd, vint32m1_t vs1, vuint32m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccsu.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccsu.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vwmaccsu_vx_i64m2(vint64m2_t vd, int32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vint64m2_t test_vwmaccsu_vx_i64m2(vint64m2_t vd, int32_t rs1, vuint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccsu.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccsu.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vwmaccsu_vv_i64m4(vint64m4_t vd, vint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vint64m4_t test_vwmaccsu_vv_i64m4(vint64m4_t vd, vint32m2_t vs1, vuint32m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccsu.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccsu.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vwmaccsu_vx_i64m4(vint64m4_t vd, int32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vint64m4_t test_vwmaccsu_vx_i64m4(vint64m4_t vd, int32_t rs1, vuint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vv_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccsu.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccsu.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vwmaccsu_vv_i64m8(vint64m8_t vd, vint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vint64m8_t test_vwmaccsu_vv_i64m8(vint64m8_t vd, vint32m4_t vs1, vuint32m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccsu_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccsu.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccsu.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vwmaccsu_vx_i64m8(vint64m8_t vd, int32_t rs1, vuint32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccu.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccu.c
index 65f88a6211ff8..a589288d9453f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccu.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccu.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccu.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccu.nxv1i16.nxv1i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], <vscale x 1 x i8> [[VS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vwmaccu_vv_u16mf4(vuint16mf4_t vd, vuint8mf8_t vs1, vuint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vuint16mf4_t test_vwmaccu_vv_u16mf4(vuint16mf4_t vd, vuint8mf8_t vs1, vuint8mf8_
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccu.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccu.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vuint16mf4_t test_vwmaccu_vx_u16mf4(vuint16mf4_t vd, uint8_t rs1, vuint8mf8_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vuint16mf4_t test_vwmaccu_vx_u16mf4(vuint16mf4_t vd, uint8_t rs1, vuint8mf8_t vs
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccu.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccu.nxv2i16.nxv2i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], <vscale x 2 x i8> [[VS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vwmaccu_vv_u16mf2(vuint16mf2_t vd, vuint8mf4_t vs1, vuint8mf4_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vuint16mf2_t test_vwmaccu_vv_u16mf2(vuint16mf2_t vd, vuint8mf4_t vs1, vuint8mf4_
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccu.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccu.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vuint16mf2_t test_vwmaccu_vx_u16mf2(vuint16mf2_t vd, uint8_t rs1, vuint8mf4_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vuint16mf2_t test_vwmaccu_vx_u16mf2(vuint16mf2_t vd, uint8_t rs1, vuint8mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccu.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccu.nxv4i16.nxv4i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], <vscale x 4 x i8> [[VS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vwmaccu_vv_u16m1(vuint16m1_t vd, vuint8mf2_t vs1, vuint8mf2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vuint16m1_t test_vwmaccu_vv_u16m1(vuint16m1_t vd, vuint8mf2_t vs1, vuint8mf2_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccu.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccu.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vuint16m1_t test_vwmaccu_vx_u16m1(vuint16m1_t vd, uint8_t rs1, vuint8mf2_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vuint16m1_t test_vwmaccu_vx_u16m1(vuint16m1_t vd, uint8_t rs1, vuint8mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccu.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccu.nxv8i16.nxv8i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], <vscale x 8 x i8> [[VS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vwmaccu_vv_u16m2(vuint16m2_t vd, vuint8m1_t vs1, vuint8m1_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vuint16m2_t test_vwmaccu_vv_u16m2(vuint16m2_t vd, vuint8m1_t vs1, vuint8m1_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccu.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccu.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vuint16m2_t test_vwmaccu_vx_u16m2(vuint16m2_t vd, uint8_t rs1, vuint8m1_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vuint16m2_t test_vwmaccu_vx_u16m2(vuint16m2_t vd, uint8_t rs1, vuint8m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccu.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccu.nxv16i16.nxv16i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], <vscale x 16 x i8> [[VS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vwmaccu_vv_u16m4(vuint16m4_t vd, vuint8m2_t vs1, vuint8m2_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vuint16m4_t test_vwmaccu_vv_u16m4(vuint16m4_t vd, vuint8m2_t vs1, vuint8m2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccu.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccu.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vuint16m4_t test_vwmaccu_vx_u16m4(vuint16m4_t vd, uint8_t rs1, vuint8m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vuint16m4_t test_vwmaccu_vx_u16m4(vuint16m4_t vd, uint8_t rs1, vuint8m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccu.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccu.nxv32i16.nxv32i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], <vscale x 32 x i8> [[VS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vwmaccu_vv_u16m8(vuint16m8_t vd, vuint8m4_t vs1, vuint8m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vuint16m8_t test_vwmaccu_vv_u16m8(vuint16m8_t vd, vuint8m4_t vs1, vuint8m4_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccu.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccu.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vuint16m8_t test_vwmaccu_vx_u16m8(vuint16m8_t vd, uint8_t rs1, vuint8m4_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vuint16m8_t test_vwmaccu_vx_u16m8(vuint16m8_t vd, uint8_t rs1, vuint8m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccu.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccu.nxv1i32.nxv1i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], <vscale x 1 x i16> [[VS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vwmaccu_vv_u32mf2(vuint32mf2_t vd, vuint16mf4_t vs1, vuint16mf4_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vuint32mf2_t test_vwmaccu_vv_u32mf2(vuint32mf2_t vd, vuint16mf4_t vs1, vuint16mf
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccu.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccu.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vuint32mf2_t test_vwmaccu_vx_u32mf2(vuint32mf2_t vd, uint16_t rs1, vuint16mf4_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vuint32mf2_t test_vwmaccu_vx_u32mf2(vuint32mf2_t vd, uint16_t rs1, vuint16mf4_t
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccu.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccu.nxv2i32.nxv2i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], <vscale x 2 x i16> [[VS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vwmaccu_vv_u32m1(vuint32m1_t vd, vuint16mf2_t vs1, vuint16mf2_t vs2, size_t vl) {
@@ -144,7 +144,7 @@ vuint32m1_t test_vwmaccu_vv_u32m1(vuint32m1_t vd, vuint16mf2_t vs1, vuint16mf2_t
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccu.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccu.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vuint32m1_t test_vwmaccu_vx_u32m1(vuint32m1_t vd, uint16_t rs1, vuint16mf2_t vs2, size_t vl) {
@@ -153,7 +153,7 @@ vuint32m1_t test_vwmaccu_vx_u32m1(vuint32m1_t vd, uint16_t rs1, vuint16mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], <vscale x 4 x i16> [[VS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vwmaccu_vv_u32m2(vuint32m2_t vd, vuint16m1_t vs1, vuint16m1_t vs2, size_t vl) {
@@ -162,7 +162,7 @@ vuint32m2_t test_vwmaccu_vv_u32m2(vuint32m2_t vd, vuint16m1_t vs1, vuint16m1_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vuint32m2_t test_vwmaccu_vx_u32m2(vuint32m2_t vd, uint16_t rs1, vuint16m1_t vs2, size_t vl) {
@@ -171,7 +171,7 @@ vuint32m2_t test_vwmaccu_vx_u32m2(vuint32m2_t vd, uint16_t rs1, vuint16m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccu.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccu.nxv8i32.nxv8i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], <vscale x 8 x i16> [[VS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vwmaccu_vv_u32m4(vuint32m4_t vd, vuint16m2_t vs1, vuint16m2_t vs2, size_t vl) {
@@ -180,7 +180,7 @@ vuint32m4_t test_vwmaccu_vv_u32m4(vuint32m4_t vd, vuint16m2_t vs1, vuint16m2_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccu.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccu.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vuint32m4_t test_vwmaccu_vx_u32m4(vuint32m4_t vd, uint16_t rs1, vuint16m2_t vs2, size_t vl) {
@@ -189,7 +189,7 @@ vuint32m4_t test_vwmaccu_vx_u32m4(vuint32m4_t vd, uint16_t rs1, vuint16m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccu.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccu.nxv16i32.nxv16i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], <vscale x 16 x i16> [[VS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vwmaccu_vv_u32m8(vuint32m8_t vd, vuint16m4_t vs1, vuint16m4_t vs2, size_t vl) {
@@ -198,7 +198,7 @@ vuint32m8_t test_vwmaccu_vv_u32m8(vuint32m8_t vd, vuint16m4_t vs1, vuint16m4_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccu.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccu.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vuint32m8_t test_vwmaccu_vx_u32m8(vuint32m8_t vd, uint16_t rs1, vuint16m4_t vs2, size_t vl) {
@@ -207,7 +207,7 @@ vuint32m8_t test_vwmaccu_vx_u32m8(vuint32m8_t vd, uint16_t rs1, vuint16m4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccu.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccu.nxv1i64.nxv1i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], <vscale x 1 x i32> [[VS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vwmaccu_vv_u64m1(vuint64m1_t vd, vuint32mf2_t vs1, vuint32mf2_t vs2, size_t vl) {
@@ -216,7 +216,7 @@ vuint64m1_t test_vwmaccu_vv_u64m1(vuint64m1_t vd, vuint32mf2_t vs1, vuint32mf2_t
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccu.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccu.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vuint64m1_t test_vwmaccu_vx_u64m1(vuint64m1_t vd, uint32_t rs1, vuint32mf2_t vs2, size_t vl) {
@@ -225,7 +225,7 @@ vuint64m1_t test_vwmaccu_vx_u64m1(vuint64m1_t vd, uint32_t rs1, vuint32mf2_t vs2
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccu.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccu.nxv2i64.nxv2i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], <vscale x 2 x i32> [[VS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vwmaccu_vv_u64m2(vuint64m2_t vd, vuint32m1_t vs1, vuint32m1_t vs2, size_t vl) {
@@ -234,7 +234,7 @@ vuint64m2_t test_vwmaccu_vv_u64m2(vuint64m2_t vd, vuint32m1_t vs1, vuint32m1_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccu.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccu.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vuint64m2_t test_vwmaccu_vx_u64m2(vuint64m2_t vd, uint32_t rs1, vuint32m1_t vs2, size_t vl) {
@@ -243,7 +243,7 @@ vuint64m2_t test_vwmaccu_vx_u64m2(vuint64m2_t vd, uint32_t rs1, vuint32m1_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.nxv4i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], <vscale x 4 x i32> [[VS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vwmaccu_vv_u64m4(vuint64m4_t vd, vuint32m2_t vs1, vuint32m2_t vs2, size_t vl) {
@@ -252,7 +252,7 @@ vuint64m4_t test_vwmaccu_vv_u64m4(vuint64m4_t vd, vuint32m2_t vs1, vuint32m2_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vuint64m4_t test_vwmaccu_vx_u64m4(vuint64m4_t vd, uint32_t rs1, vuint32m2_t vs2, size_t vl) {
@@ -261,7 +261,7 @@ vuint64m4_t test_vwmaccu_vx_u64m4(vuint64m4_t vd, uint32_t rs1, vuint32m2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vv_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccu.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccu.nxv8i64.nxv8i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], <vscale x 8 x i32> [[VS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vwmaccu_vv_u64m8(vuint64m8_t vd, vuint32m4_t vs1, vuint32m4_t vs2, size_t vl) {
@@ -270,7 +270,7 @@ vuint64m8_t test_vwmaccu_vv_u64m8(vuint64m8_t vd, vuint32m4_t vs1, vuint32m4_t v
 
 // CHECK-RV64-LABEL: @test_vwmaccu_vx_u64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccu.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccu.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vuint64m8_t test_vwmaccu_vx_u64m8(vuint64m8_t vd, uint32_t rs1, vuint32m4_t vs2, size_t vl) {

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccus.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccus.c
index bf8f8715e8b20..bd34f085f2c4d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccus.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vwmaccus.c
@@ -9,7 +9,7 @@
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccus.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vwmaccus.nxv1i16.i8.nxv1i8.i64(<vscale x 1 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 1 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP0]]
 //
 vint16mf4_t test_vwmaccus_vx_i16mf4(vint16mf4_t vd, uint8_t rs1, vint8mf8_t vs2, size_t vl) {
@@ -18,7 +18,7 @@ vint16mf4_t test_vwmaccus_vx_i16mf4(vint16mf4_t vd, uint8_t rs1, vint8mf8_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccus.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vwmaccus.nxv2i16.i8.nxv2i8.i64(<vscale x 2 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 2 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP0]]
 //
 vint16mf2_t test_vwmaccus_vx_i16mf2(vint16mf2_t vd, uint8_t rs1, vint8mf4_t vs2, size_t vl) {
@@ -27,7 +27,7 @@ vint16mf2_t test_vwmaccus_vx_i16mf2(vint16mf2_t vd, uint8_t rs1, vint8mf4_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccus.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vwmaccus.nxv4i16.i8.nxv4i8.i64(<vscale x 4 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP0]]
 //
 vint16m1_t test_vwmaccus_vx_i16m1(vint16m1_t vd, uint8_t rs1, vint8mf2_t vs2, size_t vl) {
@@ -36,7 +36,7 @@ vint16m1_t test_vwmaccus_vx_i16m1(vint16m1_t vd, uint8_t rs1, vint8mf2_t vs2, si
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccus.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vwmaccus.nxv8i16.i8.nxv8i8.i64(<vscale x 8 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 vint16m2_t test_vwmaccus_vx_i16m2(vint16m2_t vd, uint8_t rs1, vint8m1_t vs2, size_t vl) {
@@ -45,7 +45,7 @@ vint16m2_t test_vwmaccus_vx_i16m2(vint16m2_t vd, uint8_t rs1, vint8m1_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccus.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vwmaccus.nxv16i16.i8.nxv16i8.i64(<vscale x 16 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP0]]
 //
 vint16m4_t test_vwmaccus_vx_i16m4(vint16m4_t vd, uint8_t rs1, vint8m2_t vs2, size_t vl) {
@@ -54,7 +54,7 @@ vint16m4_t test_vwmaccus_vx_i16m4(vint16m4_t vd, uint8_t rs1, vint8m2_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccus.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vwmaccus.nxv32i16.i8.nxv32i8.i64(<vscale x 32 x i16> [[VD:%.*]], i8 [[RS1:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP0]]
 //
 vint16m8_t test_vwmaccus_vx_i16m8(vint16m8_t vd, uint8_t rs1, vint8m4_t vs2, size_t vl) {
@@ -63,7 +63,7 @@ vint16m8_t test_vwmaccus_vx_i16m8(vint16m8_t vd, uint8_t rs1, vint8m4_t vs2, siz
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccus.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vwmaccus.nxv1i32.i16.nxv1i16.i64(<vscale x 1 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 1 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
 //
 vint32mf2_t test_vwmaccus_vx_i32mf2(vint32mf2_t vd, uint16_t rs1, vint16mf4_t vs2, size_t vl) {
@@ -72,7 +72,7 @@ vint32mf2_t test_vwmaccus_vx_i32mf2(vint32mf2_t vd, uint16_t rs1, vint16mf4_t vs
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccus.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vwmaccus.nxv2i32.i16.nxv2i16.i64(<vscale x 2 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 2 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
 //
 vint32m1_t test_vwmaccus_vx_i32m1(vint32m1_t vd, uint16_t rs1, vint16mf2_t vs2, size_t vl) {
@@ -81,7 +81,7 @@ vint32m1_t test_vwmaccus_vx_i32m1(vint32m1_t vd, uint16_t rs1, vint16mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccus.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vwmaccus.nxv4i32.i16.nxv4i16.i64(<vscale x 4 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 4 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 vint32m2_t test_vwmaccus_vx_i32m2(vint32m2_t vd, uint16_t rs1, vint16m1_t vs2, size_t vl) {
@@ -90,7 +90,7 @@ vint32m2_t test_vwmaccus_vx_i32m2(vint32m2_t vd, uint16_t rs1, vint16m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccus.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vwmaccus.nxv8i32.i16.nxv8i16.i64(<vscale x 8 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 8 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
 //
 vint32m4_t test_vwmaccus_vx_i32m4(vint32m4_t vd, uint16_t rs1, vint16m2_t vs2, size_t vl) {
@@ -99,7 +99,7 @@ vint32m4_t test_vwmaccus_vx_i32m4(vint32m4_t vd, uint16_t rs1, vint16m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccus.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vwmaccus.nxv16i32.i16.nxv16i16.i64(<vscale x 16 x i32> [[VD:%.*]], i16 [[RS1:%.*]], <vscale x 16 x i16> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
 //
 vint32m8_t test_vwmaccus_vx_i32m8(vint32m8_t vd, uint16_t rs1, vint16m4_t vs2, size_t vl) {
@@ -108,7 +108,7 @@ vint32m8_t test_vwmaccus_vx_i32m8(vint32m8_t vd, uint16_t rs1, vint16m4_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccus.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vwmaccus.nxv1i64.i32.nxv1i32.i64(<vscale x 1 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 1 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
 //
 vint64m1_t test_vwmaccus_vx_i64m1(vint64m1_t vd, uint32_t rs1, vint32mf2_t vs2, size_t vl) {
@@ -117,7 +117,7 @@ vint64m1_t test_vwmaccus_vx_i64m1(vint64m1_t vd, uint32_t rs1, vint32mf2_t vs2,
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccus.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vwmaccus.nxv2i64.i32.nxv2i32.i64(<vscale x 2 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 2 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 vint64m2_t test_vwmaccus_vx_i64m2(vint64m2_t vd, uint32_t rs1, vint32m1_t vs2, size_t vl) {
@@ -126,7 +126,7 @@ vint64m2_t test_vwmaccus_vx_i64m2(vint64m2_t vd, uint32_t rs1, vint32m1_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccus.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vwmaccus.nxv4i64.i32.nxv4i32.i64(<vscale x 4 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 4 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
 //
 vint64m4_t test_vwmaccus_vx_i64m4(vint64m4_t vd, uint32_t rs1, vint32m2_t vs2, size_t vl) {
@@ -135,7 +135,7 @@ vint64m4_t test_vwmaccus_vx_i64m4(vint64m4_t vd, uint32_t rs1, vint32m2_t vs2, s
 
 // CHECK-RV64-LABEL: @test_vwmaccus_vx_i64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccus.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vwmaccus.nxv8i64.i32.nxv8i32.i64(<vscale x 8 x i64> [[VD:%.*]], i32 [[RS1:%.*]], <vscale x 8 x i32> [[VS2:%.*]], i64 [[VL:%.*]], i64 1)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
 //
 vint64m8_t test_vwmaccus_vx_i64m8(vint64m8_t vd, uint32_t rs1, vint32m4_t vs2, size_t vl) {


        


More information about the cfe-commits mailing list