[llvm] e2b7aab - [RISCV] Reduce the number of RISCV vector builtins by an order of magnitude.

Mon Oct 25 09:04:11 PDT 2021

Author: Craig Topper
Date: 2021-10-25T09:03:59-07:00
New Revision: e2b7aabb57d50a9b16fe71c1f80b983dd61d3fbd

URL: https://github.com/llvm/llvm-project/commit/e2b7aabb57d50a9b16fe71c1f80b983dd61d3fbd
DIFF: https://github.com/llvm/llvm-project/commit/e2b7aabb57d50a9b16fe71c1f80b983dd61d3fbd.diff

LOG: [RISCV] Reduce the number of RISCV vector builtins by an order of magnitude.

All but 2 of the vector builtins are only used by clang_builtin_alias.
When using clang_builtin_alias, the type string of the builtin is never
checked. Only the types in the function definition used for the alias
are checked.

This patch takes advantage of this to share a single builtin for
many different types. We already used type overloads on the IR intrinsic
so the codegen for the builtins that are being merge were already
the same. This extends the type overloading to the builtins.

I had to make a few tweaks to make this work.
-Floating point vector-vector vmerge now uses the vmerge intrinsic
 instead of the vfmerge intrinsic. New isel patterns and tests are
 added to support this.
-The SemaChecking for the immediate of vset_v/vget_v has been removed.
 Determining the valid range is harder now. I've added masking to
 ManualCodegen to ensure valid IR for invalid input.

This reduces the number of builtins from ~25000 to ~1100.

Reviewed By: HsiangKai

Differential Revision: https://reviews.llvm.org/D112102

Added: 
    

Modified: 
    clang/include/clang/Basic/riscv_vector.td
    clang/lib/Sema/SemaChecking.cpp
    clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
    clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
    clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
    clang/utils/TableGen/RISCVVEmitter.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
    llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index 6c812a88edd5..bae4edb2d1b3 100644

--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -1838,7 +1838,7 @@ let HasMask = false, HasPolicy = false,
       std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3);
       IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()};
     }] in {
-  defm vmerge : RVVOutOp1BuiltinSet<"vfmerge", "xfd",
+  defm vmerge : RVVOutOp1BuiltinSet<"vmerge", "xfd",
                                     [["vvm", "v", "vmvv"]]>;
   defm vfmerge : RVVOutOp1BuiltinSet<"vfmerge", "xfd",
                                     [["vfm", "v", "vmve"]]>;
@@ -2098,7 +2098,12 @@ let HasMask = false, HasVL = false, IRName = "" in {
       ManualCodegen = [{
       {
         ID = Intrinsic::experimental_vector_extract;
-        ScalableVectorType *VecTy = cast<ScalableVectorType>(ResultType);
+        auto *VecTy = cast<ScalableVectorType>(ResultType);
+        auto *OpVecTy = cast<ScalableVectorType>(Ops[0]->getType());
+        // Mask to only valid indices.
+        unsigned MaxIndex = OpVecTy->getMinNumElements() / VecTy->getMinNumElements();
+        assert(isPowerOf2_32(MaxIndex));
+        Ops[1] = Builder.CreateAnd(Ops[1], MaxIndex - 1);
         Ops[1] = Builder.CreateMul(Ops[1],
                                    ConstantInt::get(Ops[1]->getType(),
                                                     VecTy->getMinNumElements()));
@@ -2117,7 +2122,12 @@ let HasMask = false, HasVL = false, IRName = "" in {
       {
         ID = Intrinsic::experimental_vector_insert;
         IntrinsicTypes = {ResultType, Ops[2]->getType()};
-        ScalableVectorType *VecTy = cast<ScalableVectorType>(Ops[2]->getType());
+        auto *ResVecTy = cast<ScalableVectorType>(ResultType);
+        auto *VecTy = cast<ScalableVectorType>(Ops[2]->getType());
+        // Mask to only valid indices.
+        unsigned MaxIndex = ResVecTy->getMinNumElements() / VecTy->getMinNumElements();
+        assert(isPowerOf2_32(MaxIndex));
+        Ops[1] = Builder.CreateAnd(Ops[1], MaxIndex - 1);
         Ops[1] = Builder.CreateMul(Ops[1],
                                    ConstantInt::get(Ops[1]->getType(),
                                                     VecTy->getMinNumElements()));

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 63c2d9fff881..a887ea5edb80 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3662,132 +3662,6 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI,
   case RISCVVector::BI__builtin_rvv_vsetvlimax:
     return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) ||
            CheckRISCVLMUL(TheCall, 1);
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m2_i8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m2_i16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m2_i32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m2_i64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m2_f32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m2_f64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m2_u8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m2_u16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m2_u32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m2_u64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m4_i8m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m4_i16m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m4_i32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m4_i64m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m4_f32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m4_f64m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m4_u8m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m4_u16m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m4_u32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m4_u64m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m8_i8m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m8_i16m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m8_i32m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m8_i64m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m8_f32m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m8_f64m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m8_u8m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m8_u16m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m8_u32m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m8_u64m4:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1);
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m4_i8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m4_i16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m4_i32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m4_i64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m4_f32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m4_f64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m4_u8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m4_u16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m4_u32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m4_u64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m8_i8m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m8_i16m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m8_i32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m8_i64m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m8_f32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m8_f64m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m8_u8m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m8_u16m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m8_u32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m8_u64m2:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3);
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m8_i8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m8_i16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m8_i32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m8_i64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m8_f32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m8_f64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m8_u8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m8_u16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m8_u32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m8_u64m1:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7);
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m1_i8m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m1_i16m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m1_i32m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m1_i64m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m1_f32m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m1_f64m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m1_u8m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m1_u16m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m1_u32m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m1_u64m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m2_i8m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m2_i16m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m2_i32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m2_i64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m2_f32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m2_f64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m2_u8m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m2_u16m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m2_u32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m2_u64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m4_i8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m4_i16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m4_i32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m4_i64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m4_f32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m4_f64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m4_u8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m4_u16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m4_u32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m4_u64m8:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1);
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m1_i8m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m1_i16m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m1_i32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m1_i64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m1_f32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m1_f64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m1_u8m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m1_u16m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m1_u32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m1_u64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m2_i8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m2_i16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m2_i32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m2_i64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m2_f32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m2_f64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m2_u8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m2_u16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m2_u32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m2_u64m8:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3);
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m1_i8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m1_i16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m1_i32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m1_i64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m1_f32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m1_f64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m1_u8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m1_u16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m1_u32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m1_u64m8:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7);
   }
 
   return false;

diff  --git a/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c b/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
index e3ff4e4d7dd5..9db7eaa3bcd1 100644
--- a/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
+++ b/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
@@ -10,7 +10,7 @@
 static inline __attribute__((__always_inline__, __nodebug__))
 
 __rvv_generic
-__attribute__((clang_builtin_alias(__builtin_rvv_vadd_vv_i8m1)))
+__attribute__((clang_builtin_alias(__builtin_rvv_vadd_vv)))
 vint8m1_t vadd_generic (vint8m1_t op0, vint8m1_t op1, size_t op2);
 
 // CHECK-LABEL: @test(

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
index 6c2475dba4fd..7fd693a4dd92 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
@@ -887,7 +887,7 @@ vuint64m8_t test_vmerge_vxm_u64m8(vbool8_t mask, vuint64m8_t op1, uint64_t op2,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[OP1:%.*]], <vscale x 1 x float> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[OP1:%.*]], <vscale x 1 x float> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vmerge_vvm_f32mf2(vbool64_t mask, vfloat32mf2_t op1,
@@ -897,7 +897,7 @@ vfloat32mf2_t test_vmerge_vvm_f32mf2(vbool64_t mask, vfloat32mf2_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[OP1:%.*]], <vscale x 2 x float> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[OP1:%.*]], <vscale x 2 x float> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vmerge_vvm_f32m1(vbool32_t mask, vfloat32m1_t op1,
@@ -907,7 +907,7 @@ vfloat32m1_t test_vmerge_vvm_f32m1(vbool32_t mask, vfloat32m1_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vmerge_vvm_f32m2(vbool16_t mask, vfloat32m2_t op1,
@@ -917,7 +917,7 @@ vfloat32m2_t test_vmerge_vvm_f32m2(vbool16_t mask, vfloat32m2_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[OP1:%.*]], <vscale x 8 x float> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[OP1:%.*]], <vscale x 8 x float> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vmerge_vvm_f32m4(vbool8_t mask, vfloat32m4_t op1,
@@ -927,7 +927,7 @@ vfloat32m4_t test_vmerge_vvm_f32m4(vbool8_t mask, vfloat32m4_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[OP1:%.*]], <vscale x 16 x float> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[OP1:%.*]], <vscale x 16 x float> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vmerge_vvm_f32m8(vbool4_t mask, vfloat32m8_t op1,
@@ -937,7 +937,7 @@ vfloat32m8_t test_vmerge_vvm_f32m8(vbool4_t mask, vfloat32m8_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[OP1:%.*]], <vscale x 1 x double> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[OP1:%.*]], <vscale x 1 x double> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vmerge_vvm_f64m1(vbool64_t mask, vfloat64m1_t op1,
@@ -947,7 +947,7 @@ vfloat64m1_t test_vmerge_vvm_f64m1(vbool64_t mask, vfloat64m1_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vmerge_vvm_f64m2(vbool32_t mask, vfloat64m2_t op1,
@@ -957,7 +957,7 @@ vfloat64m2_t test_vmerge_vvm_f64m2(vbool32_t mask, vfloat64m2_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[OP1:%.*]], <vscale x 4 x double> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[OP1:%.*]], <vscale x 4 x double> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vmerge_vvm_f64m4(vbool16_t mask, vfloat64m4_t op1,
@@ -967,7 +967,7 @@ vfloat64m4_t test_vmerge_vvm_f64m4(vbool16_t mask, vfloat64m4_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[OP1:%.*]], <vscale x 8 x double> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[OP1:%.*]], <vscale x 8 x double> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vmerge_vvm_f64m8(vbool8_t mask, vfloat64m8_t op1,

diff  --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
index d1ba4b1bd26b..3942a83fcf9c 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
@@ -888,7 +888,7 @@ vuint64m8_t test_vmerge_vxm_u64m8(vbool8_t mask, vuint64m8_t op1, uint64_t op2,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[OP1:%.*]], <vscale x 1 x float> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[OP1:%.*]], <vscale x 1 x float> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vmerge_vvm_f32mf2(vbool64_t mask, vfloat32mf2_t op1,
@@ -898,7 +898,7 @@ vfloat32mf2_t test_vmerge_vvm_f32mf2(vbool64_t mask, vfloat32mf2_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[OP1:%.*]], <vscale x 2 x float> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[OP1:%.*]], <vscale x 2 x float> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vmerge_vvm_f32m1(vbool32_t mask, vfloat32m1_t op1,
@@ -908,7 +908,7 @@ vfloat32m1_t test_vmerge_vvm_f32m1(vbool32_t mask, vfloat32m1_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vmerge_vvm_f32m2(vbool16_t mask, vfloat32m2_t op1,
@@ -918,7 +918,7 @@ vfloat32m2_t test_vmerge_vvm_f32m2(vbool16_t mask, vfloat32m2_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[OP1:%.*]], <vscale x 8 x float> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[OP1:%.*]], <vscale x 8 x float> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vmerge_vvm_f32m4(vbool8_t mask, vfloat32m4_t op1,
@@ -928,7 +928,7 @@ vfloat32m4_t test_vmerge_vvm_f32m4(vbool8_t mask, vfloat32m4_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[OP1:%.*]], <vscale x 16 x float> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[OP1:%.*]], <vscale x 16 x float> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vmerge_vvm_f32m8(vbool4_t mask, vfloat32m8_t op1,
@@ -938,7 +938,7 @@ vfloat32m8_t test_vmerge_vvm_f32m8(vbool4_t mask, vfloat32m8_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[OP1:%.*]], <vscale x 1 x double> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[OP1:%.*]], <vscale x 1 x double> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vmerge_vvm_f64m1(vbool64_t mask, vfloat64m1_t op1,
@@ -948,7 +948,7 @@ vfloat64m1_t test_vmerge_vvm_f64m1(vbool64_t mask, vfloat64m1_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vmerge_vvm_f64m2(vbool32_t mask, vfloat64m2_t op1,
@@ -958,7 +958,7 @@ vfloat64m2_t test_vmerge_vvm_f64m2(vbool32_t mask, vfloat64m2_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[OP1:%.*]], <vscale x 4 x double> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[OP1:%.*]], <vscale x 4 x double> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vmerge_vvm_f64m4(vbool16_t mask, vfloat64m4_t op1,
@@ -968,7 +968,7 @@ vfloat64m4_t test_vmerge_vvm_f64m4(vbool16_t mask, vfloat64m4_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[OP1:%.*]], <vscale x 8 x double> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[OP1:%.*]], <vscale x 8 x double> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vmerge_vvm_f64m8(vbool8_t mask, vfloat64m8_t op1,
@@ -978,7 +978,7 @@ vfloat64m8_t test_vmerge_vvm_f64m8(vbool8_t mask, vfloat64m8_t op1,
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[OP1:%.*]], <vscale x 1 x half> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vmerge.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[OP1:%.*]], <vscale x 1 x half> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vmerge_vvm_f16mf4 (vbool64_t mask, vfloat16mf4_t op1, vfloat16mf4_t op2, size_t vl) {
@@ -987,7 +987,7 @@ vfloat16mf4_t test_vmerge_vvm_f16mf4 (vbool64_t mask, vfloat16mf4_t op1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[OP1:%.*]], <vscale x 2 x half> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vmerge.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[OP1:%.*]], <vscale x 2 x half> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vmerge_vvm_f16mf2 (vbool32_t mask, vfloat16mf2_t op1, vfloat16mf2_t op2, size_t vl) {
@@ -996,7 +996,7 @@ vfloat16mf2_t test_vmerge_vvm_f16mf2 (vbool32_t mask, vfloat16mf2_t op1, vfloat1
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[OP1:%.*]], <vscale x 4 x half> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vmerge.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[OP1:%.*]], <vscale x 4 x half> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vmerge_vvm_f16m1 (vbool16_t mask, vfloat16m1_t op1, vfloat16m1_t op2, size_t vl) {
@@ -1005,7 +1005,7 @@ vfloat16m1_t test_vmerge_vvm_f16m1 (vbool16_t mask, vfloat16m1_t op1, vfloat16m1
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vmerge.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vmerge_vvm_f16m2 (vbool8_t mask, vfloat16m2_t op1, vfloat16m2_t op2, size_t vl) {
@@ -1014,7 +1014,7 @@ vfloat16m2_t test_vmerge_vvm_f16m2 (vbool8_t mask, vfloat16m2_t op1, vfloat16m2_
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[OP1:%.*]], <vscale x 16 x half> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vmerge.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[OP1:%.*]], <vscale x 16 x half> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vmerge_vvm_f16m4 (vbool4_t mask, vfloat16m4_t op1, vfloat16m4_t op2, size_t vl) {
@@ -1023,7 +1023,7 @@ vfloat16m4_t test_vmerge_vvm_f16m4 (vbool4_t mask, vfloat16m4_t op1, vfloat16m4_
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[OP1:%.*]], <vscale x 32 x half> [[OP2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vmerge.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[OP1:%.*]], <vscale x 32 x half> [[OP2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vmerge_vvm_f16m8 (vbool2_t mask, vfloat16m8_t op1, vfloat16m8_t op2, size_t vl) {

diff  --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 4855cad52900..7a23a4bcd9fd 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -149,7 +149,8 @@ enum RISCVExtension : uint8_t {
 class RVVIntrinsic {
 
 private:
-  std::string Name; // Builtin name
+  std::string BuiltinName; // Builtin name
+  std::string Name;        // C intrinsic name.
   std::string MangledName;
   std::string IRName;
   bool IsMask;
@@ -176,6 +177,7 @@ class RVVIntrinsic {
                StringRef RequiredExtension, unsigned NF);
   ~RVVIntrinsic() = default;
 
+  StringRef getBuiltinName() const { return BuiltinName; }
   StringRef getName() const { return Name; }
   StringRef getMangledName() const { return MangledName; }
   bool hasVL() const { return HasVL; }
@@ -188,6 +190,9 @@ class RVVIntrinsic {
   StringRef getManualCodegen() const { return ManualCodegen; }
   uint8_t getRISCVExtensions() const { return RISCVExtensions; }
   unsigned getNF() const { return NF; }
+  const std::vector<int64_t> &getIntrinsicTypes() const {
+    return IntrinsicTypes;
+  }
 
   // Return the type string for a BUILTIN() macro in Builtins.def.
   std::string getBuiltinTypeStr() const;
@@ -764,8 +769,9 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
       HasNoMaskedOverloaded(HasNoMaskedOverloaded), HasAutoDef(HasAutoDef),
       ManualCodegen(ManualCodegen.str()), NF(NF) {
 
-  // Init Name and MangledName
-  Name = NewName.str();
+  // Init BuiltinName, Name and MangledName
+  BuiltinName = NewName.str();
+  Name = BuiltinName;
   if (NewMangledName.empty())
     MangledName = NewName.split("_").first.str();
   else
@@ -775,8 +781,10 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
   if (!MangledSuffix.empty())
     MangledName += "_" + MangledSuffix.str();
   if (IsMask) {
+    BuiltinName += "_m";
     Name += "_m";
   }
+
   // Init RISC-V extensions
   for (const auto &T : OutInTypes) {
     if (T->isFloatVector(16) || T->isFloat(16))
@@ -854,7 +862,7 @@ void RVVIntrinsic::emitCodeGenSwitchBody(raw_ostream &OS) const {
 
 void RVVIntrinsic::emitIntrinsicFuncDef(raw_ostream &OS) const {
   OS << "__attribute__((__clang_builtin_alias__(";
-  OS << "__builtin_rvv_" << getName() << ")))\n";
+  OS << "__builtin_rvv_" << getBuiltinName() << ")))\n";
   OS << OutputType->getTypeStr() << " " << getName() << "(";
   // Emit function arguments
   if (!InputTypes.empty()) {
@@ -867,7 +875,7 @@ void RVVIntrinsic::emitIntrinsicFuncDef(raw_ostream &OS) const {
 
 void RVVIntrinsic::emitMangledFuncDef(raw_ostream &OS) const {
   OS << "__attribute__((__clang_builtin_alias__(";
-  OS << "__builtin_rvv_" << getName() << ")))\n";
+  OS << "__builtin_rvv_" << getBuiltinName() << ")))\n";
   OS << OutputType->getTypeStr() << " " << getMangledName() << "(";
   // Emit function arguments
   if (!InputTypes.empty()) {
@@ -1009,13 +1017,31 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) {
   std::vector<std::unique_ptr<RVVIntrinsic>> Defs;
   createRVVIntrinsics(Defs);
 
+  // Map to keep track of which builtin names have already been emitted.
+  StringMap<RVVIntrinsic *> BuiltinMap;
+
   OS << "#if defined(TARGET_BUILTIN) && !defined(RISCVV_BUILTIN)\n";
   OS << "#define RISCVV_BUILTIN(ID, TYPE, ATTRS) TARGET_BUILTIN(ID, TYPE, "
         "ATTRS, \"experimental-v\")\n";
   OS << "#endif\n";
   for (auto &Def : Defs) {
-    OS << "RISCVV_BUILTIN(__builtin_rvv_" << Def->getName() << ",\""
-       << Def->getBuiltinTypeStr() << "\", \"n\")\n";
+    auto P =
+        BuiltinMap.insert(std::make_pair(Def->getBuiltinName(), Def.get()));
+    if (!P.second) {
+      // Verify that this would have produced the same builtin definition.
+      if (P.first->second->hasAutoDef() != Def->hasAutoDef()) {
+        PrintFatalError("Builtin with same name has 
diff erent hasAutoDef");
+      } else if (!Def->hasAutoDef() && P.first->second->getBuiltinTypeStr() !=
+                                           Def->getBuiltinTypeStr()) {
+        PrintFatalError("Builtin with same name has 
diff erent type string");
+      }
+      continue;
+    }
+
+    OS << "RISCVV_BUILTIN(__builtin_rvv_" << Def->getBuiltinName() << ",\"";
+    if (!Def->hasAutoDef())
+      OS << Def->getBuiltinTypeStr();
+    OS << "\", \"n\")\n";
   }
   OS << "#undef RISCVV_BUILTIN\n";
 }
@@ -1028,6 +1054,10 @@ void RVVEmitter::createCodeGen(raw_ostream &OS) {
                              const std::unique_ptr<RVVIntrinsic> &B) {
     return A->getIRName() < B->getIRName();
   });
+
+  // Map to keep track of which builtin names have already been emitted.
+  StringMap<RVVIntrinsic *> BuiltinMap;
+
   // Print switch body when the ir name or ManualCodegen changes from previous
   // iteration.
   RVVIntrinsic *PrevDef = Defs.begin()->get();
@@ -1038,7 +1068,29 @@ void RVVEmitter::createCodeGen(raw_ostream &OS) {
       PrevDef->emitCodeGenSwitchBody(OS);
     }
     PrevDef = Def.get();
-    OS << "case RISCVVector::BI__builtin_rvv_" << Def->getName() << ":\n";
+
+    auto P =
+        BuiltinMap.insert(std::make_pair(Def->getBuiltinName(), Def.get()));
+    if (P.second) {
+      OS << "case RISCVVector::BI__builtin_rvv_" << Def->getBuiltinName()
+         << ":\n";
+      continue;
+    }
+
+    if (P.first->second->getIRName() != Def->getIRName())
+      PrintFatalError("Builtin with same name has 
diff erent IRName");
+    else if (P.first->second->getManualCodegen() != Def->getManualCodegen())
+      PrintFatalError("Builtin with same name has 
diff erent ManualCodegen");
+    else if (P.first->second->getNF() != Def->getNF())
+      PrintFatalError("Builtin with same name has 
diff erent NF");
+    else if (P.first->second->isMask() != Def->isMask())
+      PrintFatalError("Builtin with same name has 
diff erent isMask");
+    else if (P.first->second->hasVL() != Def->hasVL())
+      PrintFatalError("Builtin with same name has 
diff erent HasPolicy");
+    else if (P.first->second->hasPolicy() != Def->hasPolicy())
+      PrintFatalError("Builtin with same name has 
diff erent HasPolicy");
+    else if (P.first->second->getIntrinsicTypes() != Def->getIntrinsicTypes())
+      PrintFatalError("Builtin with same name has 
diff erent IntrinsicTypes");
   }
   Defs.back()->emitCodeGenSwitchBody(OS);
   OS << "\n";

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index ffbdb88d513e..fc643aed94b9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -4574,6 +4574,10 @@ defm : VPatClassifyVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">;
 // 14.15. Vector Floating-Point Merge Instruction
 //===----------------------------------------------------------------------===//
 // We can use vmerge.vvm to support vector-vector vfmerge.
+// NOTE: Clang previously used int_riscv_vfmerge for vector-vector, but now uses
+// int_riscv_vmerge. Support both for compatibility.
+defm : VPatBinaryV_VM<"int_riscv_vmerge", "PseudoVMERGE",
+                      /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
 defm : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE",
                       /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
 defm : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE",

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
index f0e032890713..aa61c6131121 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
@@ -1344,3 +1344,333 @@ entry:
 
   ret <vscale x 8 x i64> %a
 }
+
+declare <vscale x 1 x half> @llvm.riscv.vmerge.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x half> @intrinsic_vmerge_vvm_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f16_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vmerge.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vmerge.nxv2f16.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x half> @intrinsic_vmerge_vvm_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f16_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vmerge.nxv2f16.nxv2f16(
+    <vscale x 2 x half> %0,
+    <vscale x 2 x half> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vmerge.nxv4f16.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x half> @intrinsic_vmerge_vvm_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f16_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vmerge.nxv4f16.nxv4f16(
+    <vscale x 4 x half> %0,
+    <vscale x 4 x half> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vmerge.nxv8f16.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x half> @intrinsic_vmerge_vvm_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f16_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vmerge.nxv8f16.nxv8f16(
+    <vscale x 8 x half> %0,
+    <vscale x 8 x half> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vmerge.nxv16f16.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x half> @intrinsic_vmerge_vvm_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16f16_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vmerge.nxv16f16.nxv16f16(
+    <vscale x 16 x half> %0,
+    <vscale x 16 x half> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vmerge.nxv32f16.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i32);
+
+define <vscale x 32 x half> @intrinsic_vmerge_vvm_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv32f16_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vmerge.nxv32f16.nxv32f16(
+    <vscale x 32 x half> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 32 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x float> @intrinsic_vmerge_vvm_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f32_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x float> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vmerge_vvm_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f32_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(
+    <vscale x 2 x float> %0,
+    <vscale x 2 x float> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x float> @intrinsic_vmerge_vvm_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f32_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x float> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x float> @intrinsic_vmerge_vvm_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f32_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32(
+    <vscale x 8 x float> %0,
+    <vscale x 8 x float> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x float> @intrinsic_vmerge_vvm_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16f32_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32(
+    <vscale x 16 x float> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x double> @intrinsic_vmerge_vvm_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f64_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x double> @intrinsic_vmerge_vvm_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f64_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64(
+    <vscale x 2 x double> %0,
+    <vscale x 2 x double> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x double> @intrinsic_vmerge_vvm_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f64_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64(
+    <vscale x 4 x double> %0,
+    <vscale x 4 x double> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x double> @intrinsic_vmerge_vvm_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f64_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64(
+    <vscale x 8 x double> %0,
+    <vscale x 8 x double> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x double> %a
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll
index 2a84a528045f..9f676172ee0c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
@@ -1320,3 +1320,333 @@ entry:
 
   ret <vscale x 8 x i64> %a
 }
+
+declare <vscale x 1 x half> @llvm.riscv.vmerge.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x half> @intrinsic_vmerge_vvm_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f16_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vmerge.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vmerge.nxv2f16.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x half> @intrinsic_vmerge_vvm_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f16_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vmerge.nxv2f16.nxv2f16(
+    <vscale x 2 x half> %0,
+    <vscale x 2 x half> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vmerge.nxv4f16.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x half> @intrinsic_vmerge_vvm_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f16_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vmerge.nxv4f16.nxv4f16(
+    <vscale x 4 x half> %0,
+    <vscale x 4 x half> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vmerge.nxv8f16.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x half> @intrinsic_vmerge_vvm_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f16_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vmerge.nxv8f16.nxv8f16(
+    <vscale x 8 x half> %0,
+    <vscale x 8 x half> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vmerge.nxv16f16.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x half> @intrinsic_vmerge_vvm_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16f16_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vmerge.nxv16f16.nxv16f16(
+    <vscale x 16 x half> %0,
+    <vscale x 16 x half> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vmerge.nxv32f16.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i64);
+
+define <vscale x 32 x half> @intrinsic_vmerge_vvm_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv32f16_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vmerge.nxv32f16.nxv32f16(
+    <vscale x 32 x half> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 32 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x float> @intrinsic_vmerge_vvm_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f32_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x float> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vmerge_vvm_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f32_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(
+    <vscale x 2 x float> %0,
+    <vscale x 2 x float> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x float> @intrinsic_vmerge_vvm_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f32_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x float> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x float> @intrinsic_vmerge_vvm_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f32_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32(
+    <vscale x 8 x float> %0,
+    <vscale x 8 x float> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x float> @intrinsic_vmerge_vvm_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16f32_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32(
+    <vscale x 16 x float> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vmerge_vvm_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f64_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x double> @intrinsic_vmerge_vvm_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f64_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64(
+    <vscale x 2 x double> %0,
+    <vscale x 2 x double> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x double> @intrinsic_vmerge_vvm_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f64_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64(
+    <vscale x 4 x double> %0,
+    <vscale x 4 x double> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x double> @intrinsic_vmerge_vvm_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f64_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64(
+    <vscale x 8 x double> %0,
+    <vscale x 8 x double> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x double> %a
+}