[clang] [Clang][RISCV] Add vcreate intrinsics for RVV non-tuple types (PR #70355)

Thu Oct 26 10:06:21 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Yueh-Ting (eop) Chen (eopXD)

<details>
<summary>Changes</summary>

riscv-non-isa/rvv-intrinsic-doc#288

---

Patch is 459.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70355.diff


2 Files Affected:

- (modified) clang/include/clang/Basic/riscv_vector.td (+71-6) 
- (modified) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c (+1825-630) 


``````````diff

diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index c685f3ef6087d81..deac0923ed6af94 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -345,6 +345,54 @@ class VString<int nf, bit signed> {
                    !eq(nf, 8): !if(signed, "vvvvvvvv", "UvUvUvUvUvUvUvUv"));
 }
 
+
+class FixedVString<int fixed_lmul, int num, bit signed> {
+  string V = "(LFixedLog2LMUL:" # fixed_lmul # ")" # !if(signed, "v", "Uv");
+  string S = !cond(!eq(num, 1): V,
+                   !eq(num, 2): V # V,
+                   !eq(num, 4): V # V # V # V,
+                   !eq(num, 8): V # V # V # V # V # V # V # V,
+                   true: "");
+}
+
+// This function is used in RVVNonTupleVCreateBuiltin to determine how many
+// variable we need to assemble the bigger LMUL value.
+// X = 2^{pow_x - pow_y}, with value range of pow_x, pow_y = [0, 3], pow_x > pow_y
+class GetPowDiff<int pow_x, int pow_y> {
+  int X = !cond(!eq(pow_x, 1): 2,
+                !eq(pow_x, 2):
+                  !cond(!eq(pow_y, 0): 4,
+                        !eq(pow_y, 1): 2,
+                        true: 0),
+                !eq(pow_x, 3):
+                  !cond(!eq(pow_y, 0): 8,
+                        !eq(pow_y, 1): 4,
+                        !eq(pow_y, 2): 2,
+                        true: 0),
+                true: 0);
+}
+
+multiclass RVVNonTupleVCreateBuiltin<int dst_lmul, list<int> src_lmul_list> {
+  defvar dst_v = FixedVString<dst_lmul, 1, /*signed=*/true>.V;
+  defvar dst_uv = FixedVString<dst_lmul, 1, /*signed=*/false>.V;
+  foreach src_lmul = src_lmul_list in {
+    defvar num = GetPowDiff<dst_lmul, src_lmul>.X;
+
+    defvar src_v = FixedVString<src_lmul, num, /*signed=*/true>.V;
+    defvar src_s = FixedVString<src_lmul, num, /*signed=*/true>.S;
+
+    def vcreate # src_v # dst_v : RVVBuiltin<src_v # dst_v,
+                                              dst_v # src_s,
+                                              "csilxfd", dst_v>;
+
+    defvar src_uv = FixedVString<src_lmul, num, /*signed=*/false>.V;
+    defvar src_us = FixedVString<src_lmul, num, /*signed=*/false>.S;
+    def vcreate_u # src_uv # dst_uv : RVVBuiltin<src_uv # dst_uv,
+                                                  dst_uv # src_us,
+                                                  "csil", dst_uv>;
+  }
+}
+
 multiclass RVVPseudoUnaryBuiltin<string IR, string type_range> {
   let Name = NAME,
       IRName = IR,
@@ -2429,15 +2477,32 @@ let HasMasked = false, HasVL = false, IRName = "" in {
       SupportOverloading = false,
       ManualCodegen = [{
       {
-        assert(isa<StructType>(ResultType));
-        unsigned NF = cast<StructType>(ResultType)->getNumElements();
-        llvm::Value *ReturnTuple = llvm::PoisonValue::get(ResultType);
-        for (unsigned I = 0; I < NF; ++I) {
-          ReturnTuple = Builder.CreateInsertValue(ReturnTuple, Ops[I], {I});
+        if (isa<StructType>(ResultType)) {
+          unsigned NF = cast<StructType>(ResultType)->getNumElements();
+          llvm::Value *ReturnTuple = llvm::PoisonValue::get(ResultType);
+          for (unsigned I = 0; I < NF; ++I) {
+            ReturnTuple = Builder.CreateInsertValue(ReturnTuple, Ops[I], {I});
+          }
+          return ReturnTuple;
+        } else {
+          llvm::Value *ReturnVector = llvm::PoisonValue::get(ResultType);
+          auto *VecTy = cast<ScalableVectorType>(Ops[0]->getType());
+          for (unsigned I = 0, N = Ops.size(); I < N; ++I) {
+            llvm::Value *Idx =
+              ConstantInt::get(Builder.getInt64Ty(),
+                               VecTy->getMinNumElements() * I);
+            ReturnVector =
+              Builder.CreateInsertVector(ResultType, ReturnVector, Ops[I], Idx);
+          }
+          return ReturnVector;
         }
-        return ReturnTuple;
       }
       }] in {
+
+    defm : RVVNonTupleVCreateBuiltin<1, [0]>;
+    defm : RVVNonTupleVCreateBuiltin<2, [0, 1]>;
+    defm : RVVNonTupleVCreateBuiltin<3, [0, 1, 2]>;
+
     foreach nf = NFList in {
       let NF = nf in {
         defvar T = "(Tuple:" # nf # ")";
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c
index 5dee49e4557415d..2878f2a3277a7d3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \
 // RUN:   -target-feature +zvfh -disable-O0-optnone  \
@@ -7,8 +7,897 @@
 
 #include <riscv_vector.h>
 
-// CHECK-RV64-LABEL: define dso_local { <vscale x 1 x half>, <vscale x 1 x half> } @test_vcreate_v_f16mf4x2
-// CHECK-RV64-SAME: (<vscale x 1 x half> [[V0:%.*]], <vscale x 1 x half> [[V1:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x half> @test_vcreate_v_f16m1_f16m2(
+// CHECK-RV64-SAME: <vscale x 4 x half> [[V0:%.*]], <vscale x 4 x half> [[V1:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.nxv4f16(<vscale x 8 x half> poison, <vscale x 4 x half> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.nxv4f16(<vscale x 8 x half> [[TMP0]], <vscale x 4 x half> [[V1]], i64 4)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+vfloat16m2_t test_vcreate_v_f16m1_f16m2(vfloat16m1_t v0, vfloat16m1_t v1) {
+  return __riscv_vcreate_v_f16m1_f16m2(v0, v1);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x half> @test_vcreate_v_f16m1_f16m4(
+// CHECK-RV64-SAME: <vscale x 4 x half> [[V0:%.*]], <vscale x 4 x half> [[V1:%.*]], <vscale x 4 x half> [[V2:%.*]], <vscale x 4 x half> [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv4f16(<vscale x 16 x half> poison, <vscale x 4 x half> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv4f16(<vscale x 16 x half> [[TMP0]], <vscale x 4 x half> [[V1]], i64 4)
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv4f16(<vscale x 16 x half> [[TMP1]], <vscale x 4 x half> [[V2]], i64 8)
+// CHECK-RV64-NEXT:    [[TMP3:%.*]] = call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv4f16(<vscale x 16 x half> [[TMP2]], <vscale x 4 x half> [[V3]], i64 12)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP3]]
+//
+vfloat16m4_t test_vcreate_v_f16m1_f16m4(vfloat16m1_t v0, vfloat16m1_t v1,
+                                        vfloat16m1_t v2, vfloat16m1_t v3) {
+  return __riscv_vcreate_v_f16m1_f16m4(v0, v1, v2, v3);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x half> @test_vcreate_v_f16m1_f16m8(
+// CHECK-RV64-SAME: <vscale x 4 x half> [[V0:%.*]], <vscale x 4 x half> [[V1:%.*]], <vscale x 4 x half> [[V2:%.*]], <vscale x 4 x half> [[V3:%.*]], <vscale x 4 x half> [[V4:%.*]], <vscale x 4 x half> [[V5:%.*]], <vscale x 4 x half> [[V6:%.*]], <vscale x 4 x half> [[V7:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv4f16(<vscale x 32 x half> poison, <vscale x 4 x half> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv4f16(<vscale x 32 x half> [[TMP0]], <vscale x 4 x half> [[V1]], i64 4)
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv4f16(<vscale x 32 x half> [[TMP1]], <vscale x 4 x half> [[V2]], i64 8)
+// CHECK-RV64-NEXT:    [[TMP3:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv4f16(<vscale x 32 x half> [[TMP2]], <vscale x 4 x half> [[V3]], i64 12)
+// CHECK-RV64-NEXT:    [[TMP4:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv4f16(<vscale x 32 x half> [[TMP3]], <vscale x 4 x half> [[V4]], i64 16)
+// CHECK-RV64-NEXT:    [[TMP5:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv4f16(<vscale x 32 x half> [[TMP4]], <vscale x 4 x half> [[V5]], i64 20)
+// CHECK-RV64-NEXT:    [[TMP6:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv4f16(<vscale x 32 x half> [[TMP5]], <vscale x 4 x half> [[V6]], i64 24)
+// CHECK-RV64-NEXT:    [[TMP7:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv4f16(<vscale x 32 x half> [[TMP6]], <vscale x 4 x half> [[V7]], i64 28)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP7]]
+//
+vfloat16m8_t test_vcreate_v_f16m1_f16m8(vfloat16m1_t v0, vfloat16m1_t v1,
+                                        vfloat16m1_t v2, vfloat16m1_t v3,
+                                        vfloat16m1_t v4, vfloat16m1_t v5,
+                                        vfloat16m1_t v6, vfloat16m1_t v7) {
+  return __riscv_vcreate_v_f16m1_f16m8(v0, v1, v2, v3, v4, v5, v6, v7);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x half> @test_vcreate_v_f16m2_f16m4(
+// CHECK-RV64-SAME: <vscale x 8 x half> [[V0:%.*]], <vscale x 8 x half> [[V1:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP0]], <vscale x 8 x half> [[V1]], i64 8)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP1]]
+//
+vfloat16m4_t test_vcreate_v_f16m2_f16m4(vfloat16m2_t v0, vfloat16m2_t v1) {
+  return __riscv_vcreate_v_f16m2_f16m4(v0, v1);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x half> @test_vcreate_v_f16m2_f16m8(
+// CHECK-RV64-SAME: <vscale x 8 x half> [[V0:%.*]], <vscale x 8 x half> [[V1:%.*]], <vscale x 8 x half> [[V2:%.*]], <vscale x 8 x half> [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP0]], <vscale x 8 x half> [[V1]], i64 8)
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP1]], <vscale x 8 x half> [[V2]], i64 16)
+// CHECK-RV64-NEXT:    [[TMP3:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP2]], <vscale x 8 x half> [[V3]], i64 24)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP3]]
+//
+vfloat16m8_t test_vcreate_v_f16m2_f16m8(vfloat16m2_t v0, vfloat16m2_t v1,
+                                        vfloat16m2_t v2, vfloat16m2_t v3) {
+  return __riscv_vcreate_v_f16m2_f16m8(v0, v1, v2, v3);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x half> @test_vcreate_v_f16m4_f16m8(
+// CHECK-RV64-SAME: <vscale x 16 x half> [[V0:%.*]], <vscale x 16 x half> [[V1:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv16f16(<vscale x 32 x half> poison, <vscale x 16 x half> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv16f16(<vscale x 32 x half> [[TMP0]], <vscale x 16 x half> [[V1]], i64 16)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP1]]
+//
+vfloat16m8_t test_vcreate_v_f16m4_f16m8(vfloat16m4_t v0, vfloat16m4_t v1) {
+  return __riscv_vcreate_v_f16m4_f16m8(v0, v1);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_vcreate_v_f32m1_f32m2(
+// CHECK-RV64-SAME: <vscale x 2 x float> [[V0:%.*]], <vscale x 2 x float> [[V1:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> [[TMP0]], <vscale x 2 x float> [[V1]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+vfloat32m2_t test_vcreate_v_f32m1_f32m2(vfloat32m1_t v0, vfloat32m1_t v1) {
+  return __riscv_vcreate_v_f32m1_f32m2(v0, v1);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_vcreate_v_f32m1_f32m4(
+// CHECK-RV64-SAME: <vscale x 2 x float> [[V0:%.*]], <vscale x 2 x float> [[V1:%.*]], <vscale x 2 x float> [[V2:%.*]], <vscale x 2 x float> [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv2f32(<vscale x 8 x float> poison, <vscale x 2 x float> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv2f32(<vscale x 8 x float> [[TMP0]], <vscale x 2 x float> [[V1]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv2f32(<vscale x 8 x float> [[TMP1]], <vscale x 2 x float> [[V2]], i64 4)
+// CHECK-RV64-NEXT:    [[TMP3:%.*]] = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv2f32(<vscale x 8 x float> [[TMP2]], <vscale x 2 x float> [[V3]], i64 6)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP3]]
+//
+vfloat32m4_t test_vcreate_v_f32m1_f32m4(vfloat32m1_t v0, vfloat32m1_t v1,
+                                        vfloat32m1_t v2, vfloat32m1_t v3) {
+  return __riscv_vcreate_v_f32m1_f32m4(v0, v1, v2, v3);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_vcreate_v_f32m1_f32m8(
+// CHECK-RV64-SAME: <vscale x 2 x float> [[V0:%.*]], <vscale x 2 x float> [[V1:%.*]], <vscale x 2 x float> [[V2:%.*]], <vscale x 2 x float> [[V3:%.*]], <vscale x 2 x float> [[V4:%.*]], <vscale x 2 x float> [[V5:%.*]], <vscale x 2 x float> [[V6:%.*]], <vscale x 2 x float> [[V7:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv2f32(<vscale x 16 x float> poison, <vscale x 2 x float> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv2f32(<vscale x 16 x float> [[TMP0]], <vscale x 2 x float> [[V1]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv2f32(<vscale x 16 x float> [[TMP1]], <vscale x 2 x float> [[V2]], i64 4)
+// CHECK-RV64-NEXT:    [[TMP3:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv2f32(<vscale x 16 x float> [[TMP2]], <vscale x 2 x float> [[V3]], i64 6)
+// CHECK-RV64-NEXT:    [[TMP4:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv2f32(<vscale x 16 x float> [[TMP3]], <vscale x 2 x float> [[V4]], i64 8)
+// CHECK-RV64-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv2f32(<vscale x 16 x float> [[TMP4]], <vscale x 2 x float> [[V5]], i64 10)
+// CHECK-RV64-NEXT:    [[TMP6:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv2f32(<vscale x 16 x float> [[TMP5]], <vscale x 2 x float> [[V6]], i64 12)
+// CHECK-RV64-NEXT:    [[TMP7:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv2f32(<vscale x 16 x float> [[TMP6]], <vscale x 2 x float> [[V7]], i64 14)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP7]]
+//
+vfloat32m8_t test_vcreate_v_f32m1_f32m8(vfloat32m1_t v0, vfloat32m1_t v1,
+                                        vfloat32m1_t v2, vfloat32m1_t v3,
+                                        vfloat32m1_t v4, vfloat32m1_t v5,
+                                        vfloat32m1_t v6, vfloat32m1_t v7) {
+  return __riscv_vcreate_v_f32m1_f32m8(v0, v1, v2, v3, v4, v5, v6, v7);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_vcreate_v_f32m2_f32m4(
+// CHECK-RV64-SAME: <vscale x 4 x float> [[V0:%.*]], <vscale x 4 x float> [[V1:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP0]], <vscale x 4 x float> [[V1]], i64 4)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
+//
+vfloat32m4_t test_vcreate_v_f32m2_f32m4(vfloat32m2_t v0, vfloat32m2_t v1) {
+  return __riscv_vcreate_v_f32m2_f32m4(v0, v1);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_vcreate_v_f32m2_f32m8(
+// CHECK-RV64-SAME: <vscale x 4 x float> [[V0:%.*]], <vscale x 4 x float> [[V1:%.*]], <vscale x 4 x float> [[V2:%.*]], <vscale x 4 x float> [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP0]], <vscale x 4 x float> [[V1]], i64 4)
+// CHECK-RV64-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP1]], <vscale x 4 x float> [[V2]], i64 8)
+// CHECK-RV64-NEXT:    [[TMP3:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP2]], <vscale x 4 x float> [[V3]], i64 12)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP3]]
+//
+vfloat32m8_t test_vcreate_v_f32m2_f32m8(vfloat32m2_t v0, vfloat32m2_t v1,
+                                        vfloat32m2_t v2, vfloat32m2_t v3) {
+  return __riscv_vcreate_v_f32m2_f32m8(v0, v1, v2, v3);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_vcreate_v_f32m4_f32m8(
+// CHECK-RV64-SAME: <vscale x 8 x float> [[V0:%.*]], <vscale x 8 x float> [[V1:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv8f32(<vscale x 16 x float> poison, <vscale x 8 x float> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv8f32(<vscale x 16 x float> [[TMP0]], <vscale x 8 x float> [[V1]], i64 8)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
+//
+vfloat32m8_t test_vcreate_v_f32m4_f32m8(vfloat32m4_t v0, vfloat32m4_t v1) {
+  return __riscv_vcreate_v_f32m4_f32m8(v0, v1);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x double> @test_vcreate_v_f64m1_f64m2(
+// CHECK-RV64-SAME: <vscale x 1 x double> [[V0:%.*]], <vscale x 1 x double> [[V1:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.nxv1f64(<vscale x 2 x double> poison, <vscale x 1 x double> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.nxv1f64(<vscale x 2 x double> [[TMP0]], <vscale x 1 x double> [[V1]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+vfloat64m2_t test_vcreate_v_f64m1_f64m2(vfloat64m1_t v0, vfloat64m1_t v1) {
+  return __riscv_vcreate_v_f64m1_f64m2(v0, v1);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x double> @test_vcreate_v_f64m1_f64m4(
+// CHECK-RV64-SAME: <vscale x 1 x double> [[V0:%.*]], <vscale x 1 x double> [[V1:%.*]], <vscale x 1 x double> [[V2:%.*]], <vscale x 1 x double> [[V3:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv1f64(<vscale x 4 x double> poison, <vscale x 1 x double> [[V0]], i64 0)
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv1f64(<vscale x 4 x double> [[TMP0]], <vscale x 1 x double> [[V1]], i64...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/70355