[clang] [Clang] Change placeholder from `undef` to `poison` (PR #117064)

Pedro Lobo via cfe-commits cfe-commits at lists.llvm.org
Sun Dec 1 13:09:19 PST 2024


https://github.com/pedroclobo updated https://github.com/llvm/llvm-project/pull/117064

>From 06df7fd5361a01209be58ebe2efd5f8f978a26f2 Mon Sep 17 00:00:00 2001
From: Pedro Lobo <pedro.lobo at tecnico.ulisboa.pt>
Date: Wed, 20 Nov 2024 21:30:46 +0000
Subject: [PATCH] [Clang] Change placeholder from `undef` to `poison`

---
 clang/lib/CodeGen/CGCall.cpp                  |   4 +-
 .../test/CodeGen/AArch64/pure-scalable-args.c |  20 +-
 ...cle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp |   2 +-
 .../test/CodeGen/AArch64/sve-vls-arith-ops.c  | 312 +++++++++---------
 .../CodeGen/AArch64/sve-vls-bitwise-ops.c     |  72 ++--
 .../CodeGen/AArch64/sve-vls-compare-ops.c     | 144 ++++----
 .../test/CodeGen/AArch64/sve-vls-shift-ops.c  |  96 +++---
 .../CodeGen/RISCV/attr-rvv-vector-bits-cast.c |   2 +-
 .../RISCV/attr-rvv-vector-bits-codegen.c      |  22 +-
 clang/test/CodeGen/RISCV/rvv-vls-arith-ops.c  | 288 ++++++++--------
 .../test/CodeGen/RISCV/rvv-vls-bitwise-ops.c  |  64 ++--
 .../test/CodeGen/RISCV/rvv-vls-compare-ops.c  | 120 +++----
 clang/test/CodeGen/RISCV/rvv-vls-shift-ops.c  |  96 +++---
 .../CodeGen/attr-arm-sve-vector-bits-cast.c   |   2 +-
 .../attr-arm-sve-vector-bits-codegen.c        |  10 +-
 15 files changed, 627 insertions(+), 627 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 35d495d4dfab82..7dba2b779f5ef5 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1314,10 +1314,10 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
       }
       if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) {
         auto *Load = CGF.Builder.CreateLoad(Src);
-        auto *UndefVec = llvm::UndefValue::get(ScalableDstTy);
+        auto *PoisonVec = llvm::PoisonValue::get(ScalableDstTy);
         auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
         llvm::Value *Result = CGF.Builder.CreateInsertVector(
-            ScalableDstTy, UndefVec, Load, Zero, "cast.scalable");
+            ScalableDstTy, PoisonVec, Load, Zero, "cast.scalable");
         if (ScalableDstTy != Ty)
           Result = CGF.Builder.CreateBitCast(Result, Ty);
         return Result;
diff --git a/clang/test/CodeGen/AArch64/pure-scalable-args.c b/clang/test/CodeGen/AArch64/pure-scalable-args.c
index 53d5ce4e8c9d9b..51eda3f93943dc 100644
--- a/clang/test/CodeGen/AArch64/pure-scalable-args.c
+++ b/clang/test/CodeGen/AArch64/pure-scalable-args.c
@@ -70,23 +70,23 @@ void test_argpass_simple(PST *p) {
 // CHECK-AAPCS:      define dso_local void @test_argpass_simple(ptr nocapture noundef readonly %p)
 // CHECK-AAPCS-NEXT: entry:
 // CHECK-AAPCS-NEXT: %0 = load <2 x i8>, ptr %p, align 16
-// CHECK-AAPCS-NEXT: %cast.scalable = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %0, i64 0)
+// CHECK-AAPCS-NEXT: %cast.scalable = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %0, i64 0)
 // CHECK-AAPCS-NEXT: %1 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
 // CHECK-AAPCS-NEXT: %2 = getelementptr inbounds nuw i8, ptr %p, i64 16
 // CHECK-AAPCS-NEXT: %3 = load <2 x double>, ptr %2, align 16
-// CHECK-AAPCS-NEXT: %cast.scalable1 = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> %3, i64 0)
+// CHECK-AAPCS-NEXT: %cast.scalable1 = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> %3, i64 0)
 // CHECK-AAPCS-NEXT: %4 = getelementptr inbounds nuw i8, ptr %p, i64 32
 // CHECK-AAPCS-NEXT: %5 = load <4 x float>, ptr %4, align 16
-// CHECK-AAPCS-NEXT: %cast.scalable2 = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %5, i64 0)
+// CHECK-AAPCS-NEXT: %cast.scalable2 = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %5, i64 0)
 // CHECK-AAPCS-NEXT: %6 = getelementptr inbounds nuw i8, ptr %p, i64 48
 // CHECK-AAPCS-NEXT: %7 = load <4 x float>, ptr %6, align 16
-// CHECK-AAPCS-NEXT: %cast.scalable3 = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %7, i64 0)
+// CHECK-AAPCS-NEXT: %cast.scalable3 = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %7, i64 0)
 // CHECK-AAPCS-NEXT: %8 = getelementptr inbounds nuw i8, ptr %p, i64 64
 // CHECK-AAPCS-NEXT: %9 = load <16 x i8>, ptr %8, align 16
-// CHECK-AAPCS-NEXT: %cast.scalable4 = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> %9, i64 0)
+// CHECK-AAPCS-NEXT: %cast.scalable4 = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> %9, i64 0)
 // CHECK-AAPCS-NEXT: %10 = getelementptr inbounds nuw i8, ptr %p, i64 80
 // CHECK-AAPCS-NEXT: %11 = load <2 x i8>, ptr %10, align 16
-// CHECK-AAPCS-NEXT: %cast.scalable5 = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %11, i64 0)
+// CHECK-AAPCS-NEXT: %cast.scalable5 = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %11, i64 0)
 // CHECK-AAPCS-NEXT: %12 = bitcast <vscale x 2 x i8> %cast.scalable5 to <vscale x 16 x i1>
 // CHECK-AAPCS-NEXT: tail call void @argpass_simple_callee(<vscale x 16 x i1> %1, <vscale x 2 x double> %cast.scalable1, <vscale x 4 x float> %cast.scalable2, <vscale x 4 x float> %cast.scalable3, <vscale x 16 x i8> %cast.scalable4, <vscale x 16 x i1> %12)
 // CHECK-AAPCS-NEXT: ret void
@@ -431,9 +431,9 @@ void test_va_arg(int n, ...) {
 // CHECK-AAPCS-NEXT:   %v.sroa.43.0.vaarg.addr.sroa_idx = getelementptr inbounds i8, ptr %vaarg.addr, i64 48
 // CHECK-AAPCS-NEXT:   %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0.vaarg.addr.sroa_idx, align 16
 // CHECK-AAPCS-NEXT:   call void @llvm.va_end.p0(ptr nonnull %ap)
-// CHECK-AAPCS-NEXT:   %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %v.sroa.0.0.copyload, i64 0)
+// CHECK-AAPCS-NEXT:   %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %v.sroa.0.0.copyload, i64 0)
 // CHECK-AAPCS-NEXT:   %3 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
-// CHECK-AAPCS-NEXT:   %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %v.sroa.43.0.copyload, i64 0)
+// CHECK-AAPCS-NEXT:   %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %v.sroa.43.0.copyload, i64 0)
 // CHECK-AAPCS-NEXT:   call void @use1(<vscale x 16 x i1> noundef %3, <vscale x 4 x float> noundef %cast.scalable2)
 // CHECK-AAPCS-NEXT:   call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %ap)
 // CHECK-AAPCS-NEXT:   ret void
@@ -452,9 +452,9 @@ void test_va_arg(int n, ...) {
 // CHECK-DARWIN-NEXT:   %v.sroa.43.0..sroa_idx = getelementptr inbounds i8, ptr %0, i64 48
 // CHECK-DARWIN-NEXT:   %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0..sroa_idx, align 16
 // CHECK-DARWIN-NEXT:   call void @llvm.va_end.p0(ptr nonnull %ap)
-// CHECK-DARWIN-NEXT:   %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %v.sroa.0.0.copyload, i64 0)
+// CHECK-DARWIN-NEXT:   %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %v.sroa.0.0.copyload, i64 0)
 // CHECK-DARWIN-NEXT:   %1 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
-// CHECK-DARWIN-NEXT:   %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %v.sroa.43.0.copyload, i64 0)
+// CHECK-DARWIN-NEXT:   %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %v.sroa.43.0.copyload, i64 0)
 // CHECK-DARWIN-NEXT:   call void @use1(<vscale x 16 x i1> noundef %1, <vscale x 4 x float> noundef %cast.scalable2)
 // CHECK-DARWIN-NEXT:   call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ap)
 // CHECK-DARWIN-NEXT:   ret void
diff --git a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
index 30ea73b63bce16..e82069aab24864 100644
--- a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
+++ b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
@@ -52,7 +52,7 @@ void test02() {
 // CHECK-NEXT:   [[X:%.*]] = tail call <[[#div(VBITS, 32)]] x i32> @llvm.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32(<vscale x 4 x i32> [[X_COERCE:%.*]], i64 0)
 // CHECK-NEXT:   [[Y:%.*]] = tail call <[[#div(VBITS, 32)]] x i32> @llvm.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32(<vscale x 4 x i32> [[X_COERCE1:%.*]], i64 0)
 // CHECK-NEXT:   [[ADD:%.*]] = add <[[#div(VBITS, 32)]] x i32> [[Y]], [[X]]
-// CHECK-NEXT:   [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v[[#div(VBITS, 32)]]i32(<vscale x 4 x i32> undef, <[[#div(VBITS, 32)]] x i32> [[ADD]], i64 0)
+// CHECK-NEXT:   [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v[[#div(VBITS, 32)]]i32(<vscale x 4 x i32> poison, <[[#div(VBITS, 32)]] x i32> [[ADD]], i64 0)
 // CHECK-NEXT:   ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 typedef svint32_t vec __attribute__((arm_sve_vector_bits(N)));
 auto f(vec x, vec y) { return x + y; } // Returns a vec.
diff --git a/clang/test/CodeGen/AArch64/sve-vls-arith-ops.c b/clang/test/CodeGen/AArch64/sve-vls-arith-ops.c
index a4648e9d59dcbf..571043bb29e201 100644
--- a/clang/test/CodeGen/AArch64/sve-vls-arith-ops.c
+++ b/clang/test/CodeGen/AArch64/sve-vls-arith-ops.c
@@ -32,7 +32,7 @@ typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t add_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -44,7 +44,7 @@ fixed_int8_t add_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t add_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -56,7 +56,7 @@ fixed_int16_t add_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t add_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -68,7 +68,7 @@ fixed_int32_t add_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t add_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -80,7 +80,7 @@ fixed_int64_t add_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t add_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -92,7 +92,7 @@ fixed_uint8_t add_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t add_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -104,7 +104,7 @@ fixed_uint16_t add_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t add_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -116,7 +116,7 @@ fixed_uint32_t add_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t add_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -131,7 +131,7 @@ fixed_uint64_t add_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[ADD]] to <32 x half>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t add_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -143,7 +143,7 @@ fixed_float16_t add_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t add_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -155,7 +155,7 @@ fixed_float32_t add_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t add_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -167,7 +167,7 @@ fixed_float64_t add_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t add_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -179,7 +179,7 @@ fixed_int8_t add_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t add_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -191,7 +191,7 @@ fixed_int16_t add_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t add_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -203,7 +203,7 @@ fixed_int32_t add_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t add_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -215,7 +215,7 @@ fixed_int64_t add_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t add_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -227,7 +227,7 @@ fixed_uint8_t add_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t add_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -239,7 +239,7 @@ fixed_uint16_t add_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t add_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -251,7 +251,7 @@ fixed_uint32_t add_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t add_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -266,7 +266,7 @@ fixed_uint64_t add_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[A]] to <32 x float>
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <32 x float> [[CONV2]], [[CONV]]
 // CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[ADD]] to <32 x half>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t add_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -278,7 +278,7 @@ fixed_float16_t add_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t add_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -290,7 +290,7 @@ fixed_float32_t add_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t add_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -303,7 +303,7 @@ fixed_float64_t add_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t add_scalar_i8(fixed_int8_t a, int8_t b) {
@@ -316,7 +316,7 @@ fixed_int8_t add_scalar_i8(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t add_scalar_i16(fixed_int16_t a, int16_t b) {
@@ -329,7 +329,7 @@ fixed_int16_t add_scalar_i16(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t add_scalar_i32(fixed_int32_t a, int32_t b) {
@@ -342,7 +342,7 @@ fixed_int32_t add_scalar_i32(fixed_int32_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t add_scalar_i64(fixed_int64_t a, int64_t b) {
@@ -355,7 +355,7 @@ fixed_int64_t add_scalar_i64(fixed_int64_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t add_scalar_u8(fixed_uint8_t a, uint8_t b) {
@@ -368,7 +368,7 @@ fixed_uint8_t add_scalar_u8(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t add_scalar_u16(fixed_uint16_t a, uint16_t b) {
@@ -381,7 +381,7 @@ fixed_uint16_t add_scalar_u16(fixed_uint16_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t add_scalar_u32(fixed_uint32_t a, uint32_t b) {
@@ -394,7 +394,7 @@ fixed_uint32_t add_scalar_u32(fixed_uint32_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t add_scalar_u64(fixed_uint64_t a, uint64_t b) {
@@ -407,7 +407,7 @@ fixed_uint64_t add_scalar_u64(fixed_uint64_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <32 x half> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t add_scalar_f16(fixed_float16_t a, __fp16 b) {
@@ -420,7 +420,7 @@ fixed_float16_t add_scalar_f16(fixed_float16_t a, __fp16 b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t add_scalar_f32(fixed_float32_t a, float b) {
@@ -433,7 +433,7 @@ fixed_float32_t add_scalar_f32(fixed_float32_t a, float b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x double> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t add_scalar_f64(fixed_float64_t a, double b) {
@@ -447,7 +447,7 @@ fixed_float64_t add_scalar_f64(fixed_float64_t a, double b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t sub_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -459,7 +459,7 @@ fixed_int8_t sub_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t sub_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -471,7 +471,7 @@ fixed_int16_t sub_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t sub_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -483,7 +483,7 @@ fixed_int32_t sub_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t sub_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -495,7 +495,7 @@ fixed_int64_t sub_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t sub_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -507,7 +507,7 @@ fixed_uint8_t sub_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t sub_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -519,7 +519,7 @@ fixed_uint16_t sub_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t sub_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -531,7 +531,7 @@ fixed_uint32_t sub_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t sub_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -546,7 +546,7 @@ fixed_uint64_t sub_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[SUB]] to <32 x half>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t sub_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -558,7 +558,7 @@ fixed_float16_t sub_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <16 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t sub_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -570,7 +570,7 @@ fixed_float32_t sub_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t sub_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -582,7 +582,7 @@ fixed_float64_t sub_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t sub_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -594,7 +594,7 @@ fixed_int8_t sub_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t sub_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -606,7 +606,7 @@ fixed_int16_t sub_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t sub_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -618,7 +618,7 @@ fixed_int32_t sub_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t sub_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -630,7 +630,7 @@ fixed_int64_t sub_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t sub_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -642,7 +642,7 @@ fixed_uint8_t sub_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t sub_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -654,7 +654,7 @@ fixed_uint16_t sub_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t sub_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -666,7 +666,7 @@ fixed_uint32_t sub_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t sub_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -681,7 +681,7 @@ fixed_uint64_t sub_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[SUB]] to <32 x half>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t sub_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -693,7 +693,7 @@ fixed_float16_t sub_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <16 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t sub_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -705,7 +705,7 @@ fixed_float32_t sub_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t sub_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -718,7 +718,7 @@ fixed_float64_t sub_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t sub_scalar_i8(fixed_int8_t a, int8_t b) {
@@ -731,7 +731,7 @@ fixed_int8_t sub_scalar_i8(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t sub_scalar_i16(fixed_int16_t a, int16_t b) {
@@ -744,7 +744,7 @@ fixed_int16_t sub_scalar_i16(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t sub_scalar_i32(fixed_int32_t a, int32_t b) {
@@ -757,7 +757,7 @@ fixed_int32_t sub_scalar_i32(fixed_int32_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t sub_scalar_i64(fixed_int64_t a, int64_t b) {
@@ -770,7 +770,7 @@ fixed_int64_t sub_scalar_i64(fixed_int64_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t sub_scalar_u8(fixed_uint8_t a, uint8_t b) {
@@ -783,7 +783,7 @@ fixed_uint8_t sub_scalar_u8(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t sub_scalar_u16(fixed_uint16_t a, uint16_t b) {
@@ -796,7 +796,7 @@ fixed_uint16_t sub_scalar_u16(fixed_uint16_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t sub_scalar_u32(fixed_uint32_t a, uint32_t b) {
@@ -809,7 +809,7 @@ fixed_uint32_t sub_scalar_u32(fixed_uint32_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t sub_scalar_u64(fixed_uint64_t a, uint64_t b) {
@@ -822,7 +822,7 @@ fixed_uint64_t sub_scalar_u64(fixed_uint64_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <32 x half> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t sub_scalar_f16(fixed_float16_t a, __fp16 b) {
@@ -835,7 +835,7 @@ fixed_float16_t sub_scalar_f16(fixed_float16_t a, __fp16 b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <16 x float> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t sub_scalar_f32(fixed_float32_t a, float b) {
@@ -848,7 +848,7 @@ fixed_float32_t sub_scalar_f32(fixed_float32_t a, float b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x double> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t sub_scalar_f64(fixed_float64_t a, double b) {
@@ -862,7 +862,7 @@ fixed_float64_t sub_scalar_f64(fixed_float64_t a, double b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t mul_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -874,7 +874,7 @@ fixed_int8_t mul_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t mul_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -886,7 +886,7 @@ fixed_int16_t mul_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t mul_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -898,7 +898,7 @@ fixed_int32_t mul_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t mul_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -910,7 +910,7 @@ fixed_int64_t mul_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t mul_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -922,7 +922,7 @@ fixed_uint8_t mul_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t mul_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -934,7 +934,7 @@ fixed_uint16_t mul_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t mul_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -946,7 +946,7 @@ fixed_uint32_t mul_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t mul_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -961,7 +961,7 @@ fixed_uint64_t mul_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[MUL]] to <32 x half>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t mul_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -973,7 +973,7 @@ fixed_float16_t mul_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <16 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t mul_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -985,7 +985,7 @@ fixed_float32_t mul_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t mul_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -997,7 +997,7 @@ fixed_float64_t mul_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t mul_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -1009,7 +1009,7 @@ fixed_int8_t mul_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t mul_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -1021,7 +1021,7 @@ fixed_int16_t mul_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t mul_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -1033,7 +1033,7 @@ fixed_int32_t mul_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t mul_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -1045,7 +1045,7 @@ fixed_int64_t mul_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t mul_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -1057,7 +1057,7 @@ fixed_uint8_t mul_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t mul_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -1069,7 +1069,7 @@ fixed_uint16_t mul_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t mul_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -1081,7 +1081,7 @@ fixed_uint32_t mul_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t mul_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -1096,7 +1096,7 @@ fixed_uint64_t mul_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[MUL]] to <32 x half>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t mul_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -1108,7 +1108,7 @@ fixed_float16_t mul_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <16 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t mul_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -1120,7 +1120,7 @@ fixed_float32_t mul_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t mul_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -1133,7 +1133,7 @@ fixed_float64_t mul_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t mul_scalar_i8(fixed_int8_t a, int8_t b) {
@@ -1146,7 +1146,7 @@ fixed_int8_t mul_scalar_i8(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t mul_scalar_i16(fixed_int16_t a, int16_t b) {
@@ -1159,7 +1159,7 @@ fixed_int16_t mul_scalar_i16(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t mul_scalar_i32(fixed_int32_t a, int32_t b) {
@@ -1172,7 +1172,7 @@ fixed_int32_t mul_scalar_i32(fixed_int32_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t mul_scalar_i64(fixed_int64_t a, int64_t b) {
@@ -1185,7 +1185,7 @@ fixed_int64_t mul_scalar_i64(fixed_int64_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t mul_scalar_u8(fixed_uint8_t a, uint8_t b) {
@@ -1198,7 +1198,7 @@ fixed_uint8_t mul_scalar_u8(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t mul_scalar_u16(fixed_uint16_t a, uint16_t b) {
@@ -1211,7 +1211,7 @@ fixed_uint16_t mul_scalar_u16(fixed_uint16_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t mul_scalar_u32(fixed_uint32_t a, uint32_t b) {
@@ -1224,7 +1224,7 @@ fixed_uint32_t mul_scalar_u32(fixed_uint32_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t mul_scalar_u64(fixed_uint64_t a, uint64_t b) {
@@ -1237,7 +1237,7 @@ fixed_uint64_t mul_scalar_u64(fixed_uint64_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <32 x half> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t mul_scalar_f16(fixed_float16_t a, __fp16 b) {
@@ -1250,7 +1250,7 @@ fixed_float16_t mul_scalar_f16(fixed_float16_t a, __fp16 b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <16 x float> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t mul_scalar_f32(fixed_float32_t a, float b) {
@@ -1263,7 +1263,7 @@ fixed_float32_t mul_scalar_f32(fixed_float32_t a, float b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x double> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t mul_scalar_f64(fixed_float64_t a, double b) {
@@ -1277,7 +1277,7 @@ fixed_float64_t mul_scalar_f64(fixed_float64_t a, double b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t div_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -1289,7 +1289,7 @@ fixed_int8_t div_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t div_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -1301,7 +1301,7 @@ fixed_int16_t div_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t div_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -1313,7 +1313,7 @@ fixed_int32_t div_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t div_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -1325,7 +1325,7 @@ fixed_int64_t div_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t div_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -1337,7 +1337,7 @@ fixed_uint8_t div_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t div_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -1349,7 +1349,7 @@ fixed_uint16_t div_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t div_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -1361,7 +1361,7 @@ fixed_uint32_t div_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t div_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -1376,7 +1376,7 @@ fixed_uint64_t div_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[DIV]] to <32 x half>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t div_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -1388,7 +1388,7 @@ fixed_float16_t div_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <16 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t div_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -1400,7 +1400,7 @@ fixed_float32_t div_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t div_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -1412,7 +1412,7 @@ fixed_float64_t div_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t div_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -1424,7 +1424,7 @@ fixed_int8_t div_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t div_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -1436,7 +1436,7 @@ fixed_int16_t div_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t div_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -1448,7 +1448,7 @@ fixed_int32_t div_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t div_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -1460,7 +1460,7 @@ fixed_int64_t div_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t div_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -1472,7 +1472,7 @@ fixed_uint8_t div_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t div_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -1484,7 +1484,7 @@ fixed_uint16_t div_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t div_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -1496,7 +1496,7 @@ fixed_uint32_t div_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t div_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -1511,7 +1511,7 @@ fixed_uint64_t div_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CONV2:%.*]] = fpext <32 x half> [[B]] to <32 x float>
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[CONV3:%.*]] = fptrunc <32 x float> [[DIV]] to <32 x half>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t div_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -1523,7 +1523,7 @@ fixed_float16_t div_inplace_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <16 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t div_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -1535,7 +1535,7 @@ fixed_float32_t div_inplace_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t div_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -1548,7 +1548,7 @@ fixed_float64_t div_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <64 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t div_scalar_i8(fixed_int8_t a, int8_t b) {
@@ -1561,7 +1561,7 @@ fixed_int8_t div_scalar_i8(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t div_scalar_i16(fixed_int16_t a, int16_t b) {
@@ -1574,7 +1574,7 @@ fixed_int16_t div_scalar_i16(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t div_scalar_i32(fixed_int32_t a, int32_t b) {
@@ -1587,7 +1587,7 @@ fixed_int32_t div_scalar_i32(fixed_int32_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t div_scalar_i64(fixed_int64_t a, int64_t b) {
@@ -1600,7 +1600,7 @@ fixed_int64_t div_scalar_i64(fixed_int64_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <64 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t div_scalar_u8(fixed_uint8_t a, uint8_t b) {
@@ -1613,7 +1613,7 @@ fixed_uint8_t div_scalar_u8(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t div_scalar_u16(fixed_uint16_t a, uint16_t b) {
@@ -1626,7 +1626,7 @@ fixed_uint16_t div_scalar_u16(fixed_uint16_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t div_scalar_u32(fixed_uint32_t a, uint32_t b) {
@@ -1639,7 +1639,7 @@ fixed_uint32_t div_scalar_u32(fixed_uint32_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t div_scalar_u64(fixed_uint64_t a, uint64_t b) {
@@ -1652,7 +1652,7 @@ fixed_uint64_t div_scalar_u64(fixed_uint64_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <32 x half> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> poison, <32 x half> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[CASTSCALABLESVE]]
 //
 fixed_float16_t div_scalar_f16(fixed_float16_t a, __fp16 b) {
@@ -1665,7 +1665,7 @@ fixed_float16_t div_scalar_f16(fixed_float16_t a, __fp16 b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <16 x float> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[CASTSCALABLESVE]]
 //
 fixed_float32_t div_scalar_f32(fixed_float32_t a, float b) {
@@ -1678,7 +1678,7 @@ fixed_float32_t div_scalar_f32(fixed_float32_t a, float b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x double> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> poison, <8 x double> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[CASTSCALABLESVE]]
 //
 fixed_float64_t div_scalar_f64(fixed_float64_t a, double b) {
@@ -1692,7 +1692,7 @@ fixed_float64_t div_scalar_f64(fixed_float64_t a, double b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t rem_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -1704,7 +1704,7 @@ fixed_int8_t rem_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t rem_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -1716,7 +1716,7 @@ fixed_int16_t rem_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t rem_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -1728,7 +1728,7 @@ fixed_int32_t rem_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t rem_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -1740,7 +1740,7 @@ fixed_int64_t rem_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t rem_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -1752,7 +1752,7 @@ fixed_uint8_t rem_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t rem_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -1764,7 +1764,7 @@ fixed_uint16_t rem_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t rem_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -1776,7 +1776,7 @@ fixed_uint32_t rem_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t rem_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -1788,7 +1788,7 @@ fixed_uint64_t rem_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t rem_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -1800,7 +1800,7 @@ fixed_int8_t rem_inplace_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t rem_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -1812,7 +1812,7 @@ fixed_int16_t rem_inplace_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t rem_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -1824,7 +1824,7 @@ fixed_int32_t rem_inplace_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t rem_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -1836,7 +1836,7 @@ fixed_int64_t rem_inplace_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t rem_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -1848,7 +1848,7 @@ fixed_uint8_t rem_inplace_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t rem_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -1860,7 +1860,7 @@ fixed_uint16_t rem_inplace_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t rem_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -1872,7 +1872,7 @@ fixed_uint32_t rem_inplace_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t rem_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -1885,7 +1885,7 @@ fixed_uint64_t rem_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <64 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t rem_scalar_i8(fixed_int8_t a, int8_t b) {
@@ -1898,7 +1898,7 @@ fixed_int8_t rem_scalar_i8(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <32 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t rem_scalar_i16(fixed_int16_t a, int16_t b) {
@@ -1911,7 +1911,7 @@ fixed_int16_t rem_scalar_i16(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t rem_scalar_i32(fixed_int32_t a, int32_t b) {
@@ -1924,7 +1924,7 @@ fixed_int32_t rem_scalar_i32(fixed_int32_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t rem_scalar_i64(fixed_int64_t a, int64_t b) {
@@ -1937,7 +1937,7 @@ fixed_int64_t rem_scalar_i64(fixed_int64_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <64 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t rem_scalar_u8(fixed_uint8_t a, uint8_t b) {
@@ -1950,7 +1950,7 @@ fixed_uint8_t rem_scalar_u8(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <32 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t rem_scalar_u16(fixed_uint16_t a, uint16_t b) {
@@ -1963,7 +1963,7 @@ fixed_uint16_t rem_scalar_u16(fixed_uint16_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t rem_scalar_u32(fixed_uint32_t a, uint32_t b) {
@@ -1976,7 +1976,7 @@ fixed_uint32_t rem_scalar_u32(fixed_uint32_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t rem_scalar_u64(fixed_uint64_t a, uint64_t b) {
diff --git a/clang/test/CodeGen/AArch64/sve-vls-bitwise-ops.c b/clang/test/CodeGen/AArch64/sve-vls-bitwise-ops.c
index 017c71aa41143b..33c7a2d1bb630f 100644
--- a/clang/test/CodeGen/AArch64/sve-vls-bitwise-ops.c
+++ b/clang/test/CodeGen/AArch64/sve-vls-bitwise-ops.c
@@ -34,7 +34,7 @@ typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
 // CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <8 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[AND]], i64 0)
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
@@ -47,7 +47,7 @@ fixed_bool_t and_bool(fixed_bool_t a, fixed_bool_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8_t and_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -59,7 +59,7 @@ fixed_int8_t and_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16_t and_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -71,7 +71,7 @@ fixed_int16_t and_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32_t and_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -83,7 +83,7 @@ fixed_int32_t and_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64_t and_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -95,7 +95,7 @@ fixed_int64_t and_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8_t and_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -107,7 +107,7 @@ fixed_uint8_t and_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16_t and_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -119,7 +119,7 @@ fixed_uint16_t and_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32_t and_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -131,7 +131,7 @@ fixed_uint32_t and_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64_t and_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -147,7 +147,7 @@ fixed_uint64_t and_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <8 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[OR]], i64 0)
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
@@ -160,7 +160,7 @@ fixed_bool_t or_bool(fixed_bool_t a, fixed_bool_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8_t or_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -172,7 +172,7 @@ fixed_int8_t or_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16_t or_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -184,7 +184,7 @@ fixed_int16_t or_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32_t or_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -196,7 +196,7 @@ fixed_int32_t or_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64_t or_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -208,7 +208,7 @@ fixed_int64_t or_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8_t or_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -220,7 +220,7 @@ fixed_uint8_t or_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16_t or_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -232,7 +232,7 @@ fixed_uint16_t or_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32_t or_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -244,7 +244,7 @@ fixed_uint32_t or_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64_t or_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -260,7 +260,7 @@ fixed_uint64_t or_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[XOR]], i64 0)
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
@@ -273,7 +273,7 @@ fixed_bool_t xor_bool(fixed_bool_t a, fixed_bool_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8_t xor_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -285,7 +285,7 @@ fixed_int8_t xor_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16_t xor_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -297,7 +297,7 @@ fixed_int16_t xor_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32_t xor_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -309,7 +309,7 @@ fixed_int32_t xor_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64_t xor_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -321,7 +321,7 @@ fixed_int64_t xor_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8_t xor_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -333,7 +333,7 @@ fixed_uint8_t xor_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16_t xor_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -345,7 +345,7 @@ fixed_uint16_t xor_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32_t xor_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -357,7 +357,7 @@ fixed_uint32_t xor_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64_t xor_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -371,7 +371,7 @@ fixed_uint64_t xor_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i8> [[A]], splat (i8 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[NOT]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
@@ -383,7 +383,7 @@ fixed_bool_t neg_bool(fixed_bool_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <64 x i8> [[A]], splat (i8 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8_t neg_i8(fixed_int8_t a) {
@@ -394,7 +394,7 @@ fixed_int8_t neg_i8(fixed_int8_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i16> [[A]], splat (i16 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16_t neg_i16(fixed_int16_t a) {
@@ -405,7 +405,7 @@ fixed_int16_t neg_i16(fixed_int16_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i32> [[A]], splat (i32 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32_t neg_i32(fixed_int32_t a) {
@@ -416,7 +416,7 @@ fixed_int32_t neg_i32(fixed_int32_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i64> [[A]], splat (i64 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64_t neg_i64(fixed_int64_t a) {
@@ -427,7 +427,7 @@ fixed_int64_t neg_i64(fixed_int64_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <64 x i8> [[A]], splat (i8 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8_t neg_u8(fixed_uint8_t a) {
@@ -438,7 +438,7 @@ fixed_uint8_t neg_u8(fixed_uint8_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i16> [[A]], splat (i16 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16_t neg_u16(fixed_uint16_t a) {
@@ -449,7 +449,7 @@ fixed_uint16_t neg_u16(fixed_uint16_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i32> [[A]], splat (i32 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32_t neg_u32(fixed_uint32_t a) {
@@ -460,7 +460,7 @@ fixed_uint32_t neg_u32(fixed_uint32_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i64> [[A]], splat (i64 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64_t neg_u64(fixed_uint64_t a) {
diff --git a/clang/test/CodeGen/AArch64/sve-vls-compare-ops.c b/clang/test/CodeGen/AArch64/sve-vls-compare-ops.c
index fe81adfdb08269..3216ea859fbc8e 100644
--- a/clang/test/CodeGen/AArch64/sve-vls-compare-ops.c
+++ b/clang/test/CodeGen/AArch64/sve-vls-compare-ops.c
@@ -35,7 +35,7 @@ typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
@@ -49,7 +49,7 @@ fixed_bool_t eq_bool(fixed_bool_t a, fixed_bool_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t eq_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -62,7 +62,7 @@ fixed_int8_t eq_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t eq_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -75,7 +75,7 @@ fixed_int16_t eq_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t eq_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -88,7 +88,7 @@ fixed_int32_t eq_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t eq_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -101,7 +101,7 @@ fixed_int64_t eq_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t eq_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -114,7 +114,7 @@ fixed_int8_t eq_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t eq_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -127,7 +127,7 @@ fixed_int16_t eq_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t eq_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -140,7 +140,7 @@ fixed_int32_t eq_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t eq_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -156,7 +156,7 @@ fixed_int64_t eq_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
 // CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t eq_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -169,7 +169,7 @@ fixed_int16_t eq_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <16 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t eq_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -182,7 +182,7 @@ fixed_int32_t eq_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <8 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t eq_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -199,7 +199,7 @@ fixed_int64_t eq_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
@@ -213,7 +213,7 @@ fixed_bool_t neq_bool(fixed_bool_t a, fixed_bool_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t neq_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -226,7 +226,7 @@ fixed_int8_t neq_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t neq_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -239,7 +239,7 @@ fixed_int16_t neq_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t neq_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -252,7 +252,7 @@ fixed_int32_t neq_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t neq_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -265,7 +265,7 @@ fixed_int64_t neq_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t neq_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -278,7 +278,7 @@ fixed_int8_t neq_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t neq_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -291,7 +291,7 @@ fixed_int16_t neq_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t neq_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -304,7 +304,7 @@ fixed_int32_t neq_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t neq_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -320,7 +320,7 @@ fixed_int64_t neq_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp une <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
 // CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t neq_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -333,7 +333,7 @@ fixed_int16_t neq_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp une <16 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t neq_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -346,7 +346,7 @@ fixed_int32_t neq_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp une <8 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t neq_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -363,7 +363,7 @@ fixed_int64_t neq_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ult <8 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
@@ -377,7 +377,7 @@ fixed_bool_t lt_bool(fixed_bool_t a, fixed_bool_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp slt <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t lt_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -390,7 +390,7 @@ fixed_int8_t lt_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp slt <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t lt_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -403,7 +403,7 @@ fixed_int16_t lt_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp slt <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t lt_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -416,7 +416,7 @@ fixed_int32_t lt_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp slt <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t lt_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -429,7 +429,7 @@ fixed_int64_t lt_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ult <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t lt_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -442,7 +442,7 @@ fixed_int8_t lt_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ult <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t lt_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -455,7 +455,7 @@ fixed_int16_t lt_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ult <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t lt_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -468,7 +468,7 @@ fixed_int32_t lt_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ult <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t lt_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -484,7 +484,7 @@ fixed_int64_t lt_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
 // CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t lt_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -497,7 +497,7 @@ fixed_int16_t lt_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <16 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t lt_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -510,7 +510,7 @@ fixed_int32_t lt_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <8 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t lt_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -527,7 +527,7 @@ fixed_int64_t lt_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ule <8 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
@@ -541,7 +541,7 @@ fixed_bool_t leq_bool(fixed_bool_t a, fixed_bool_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sle <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t leq_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -554,7 +554,7 @@ fixed_int8_t leq_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sle <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t leq_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -567,7 +567,7 @@ fixed_int16_t leq_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sle <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t leq_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -580,7 +580,7 @@ fixed_int32_t leq_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sle <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t leq_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -593,7 +593,7 @@ fixed_int64_t leq_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ule <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t leq_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -606,7 +606,7 @@ fixed_int8_t leq_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ule <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t leq_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -619,7 +619,7 @@ fixed_int16_t leq_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ule <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t leq_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -632,7 +632,7 @@ fixed_int32_t leq_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ule <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t leq_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -648,7 +648,7 @@ fixed_int64_t leq_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
 // CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t leq_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -661,7 +661,7 @@ fixed_int16_t leq_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <16 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t leq_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -674,7 +674,7 @@ fixed_int32_t leq_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <8 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t leq_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -691,7 +691,7 @@ fixed_int64_t leq_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <8 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
@@ -705,7 +705,7 @@ fixed_bool_t gt_bool(fixed_bool_t a, fixed_bool_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t gt_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -718,7 +718,7 @@ fixed_int8_t gt_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t gt_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -731,7 +731,7 @@ fixed_int16_t gt_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t gt_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -744,7 +744,7 @@ fixed_int32_t gt_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t gt_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -757,7 +757,7 @@ fixed_int64_t gt_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t gt_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -770,7 +770,7 @@ fixed_int8_t gt_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t gt_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -783,7 +783,7 @@ fixed_int16_t gt_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t gt_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -796,7 +796,7 @@ fixed_int32_t gt_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t gt_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -812,7 +812,7 @@ fixed_int64_t gt_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
 // CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t gt_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -825,7 +825,7 @@ fixed_int16_t gt_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <16 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t gt_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -838,7 +838,7 @@ fixed_int32_t gt_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <8 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t gt_f64(fixed_float64_t a, fixed_float64_t b) {
@@ -855,7 +855,7 @@ fixed_int64_t gt_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp uge <8 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CASTSCALABLESVE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
@@ -869,7 +869,7 @@ fixed_bool_t geq_bool(fixed_bool_t a, fixed_bool_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sge <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t geq_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -882,7 +882,7 @@ fixed_int8_t geq_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sge <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t geq_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -895,7 +895,7 @@ fixed_int16_t geq_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sge <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t geq_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -908,7 +908,7 @@ fixed_int32_t geq_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sge <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t geq_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -921,7 +921,7 @@ fixed_int64_t geq_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp uge <64 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <64 x i1> [[CMP]] to <64 x i8>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t geq_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -934,7 +934,7 @@ fixed_int8_t geq_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp uge <32 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t geq_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -947,7 +947,7 @@ fixed_int16_t geq_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp uge <16 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t geq_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -960,7 +960,7 @@ fixed_int32_t geq_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp uge <8 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t geq_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -976,7 +976,7 @@ fixed_int64_t geq_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <32 x float> [[CONV]], [[CONV2]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i32>
 // CHECK-NEXT:    [[CONV3:%.*]] = trunc <32 x i32> [[SEXT]] to <32 x i16>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[CONV3]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[CONV3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t geq_f16(fixed_float16_t a, fixed_float16_t b) {
@@ -989,7 +989,7 @@ fixed_int16_t geq_f16(fixed_float16_t a, fixed_float16_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <16 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i32>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t geq_f32(fixed_float32_t a, fixed_float32_t b) {
@@ -1002,7 +1002,7 @@ fixed_int32_t geq_f32(fixed_float32_t a, fixed_float32_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <8 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i64>
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t geq_f64(fixed_float64_t a, fixed_float64_t b) {
diff --git a/clang/test/CodeGen/AArch64/sve-vls-shift-ops.c b/clang/test/CodeGen/AArch64/sve-vls-shift-ops.c
index 05b8f3535df703..109cbb9b47a8ed 100644
--- a/clang/test/CodeGen/AArch64/sve-vls-shift-ops.c
+++ b/clang/test/CodeGen/AArch64/sve-vls-shift-ops.c
@@ -30,7 +30,7 @@ typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t lshift_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -42,7 +42,7 @@ fixed_int8_t lshift_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t rshift_i8(fixed_int8_t a, fixed_int8_t b) {
@@ -54,7 +54,7 @@ fixed_int8_t rshift_i8(fixed_int8_t a, fixed_int8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t lshift_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -66,7 +66,7 @@ fixed_uint8_t lshift_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <64 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t rshift_u8(fixed_uint8_t a, fixed_uint8_t b) {
@@ -78,7 +78,7 @@ fixed_uint8_t rshift_u8(fixed_uint8_t a, fixed_uint8_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t lshift_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -90,7 +90,7 @@ fixed_int16_t lshift_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t rshift_i16(fixed_int16_t a, fixed_int16_t b) {
@@ -102,7 +102,7 @@ fixed_int16_t rshift_i16(fixed_int16_t a, fixed_int16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t lshift_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -114,7 +114,7 @@ fixed_uint16_t lshift_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t rshift_u16(fixed_uint16_t a, fixed_uint16_t b) {
@@ -126,7 +126,7 @@ fixed_uint16_t rshift_u16(fixed_uint16_t a, fixed_uint16_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t lshift_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -138,7 +138,7 @@ fixed_int32_t lshift_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t rshift_i32(fixed_int32_t a, fixed_int32_t b) {
@@ -150,7 +150,7 @@ fixed_int32_t rshift_i32(fixed_int32_t a, fixed_int32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t lshift_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -162,7 +162,7 @@ fixed_uint32_t lshift_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t rshift_u32(fixed_uint32_t a, fixed_uint32_t b) {
@@ -174,7 +174,7 @@ fixed_uint32_t rshift_u32(fixed_uint32_t a, fixed_uint32_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t lshift_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -186,7 +186,7 @@ fixed_int64_t lshift_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t rshift_i64(fixed_int64_t a, fixed_int64_t b) {
@@ -198,7 +198,7 @@ fixed_int64_t rshift_i64(fixed_int64_t a, fixed_int64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t lshift_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -210,7 +210,7 @@ fixed_uint64_t lshift_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t rshift_u64(fixed_uint64_t a, fixed_uint64_t b) {
@@ -225,7 +225,7 @@ fixed_uint64_t rshift_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t lshift_i8_rsplat(fixed_int8_t a, int8_t b) {
@@ -238,7 +238,7 @@ fixed_int8_t lshift_i8_rsplat(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t lshift_i8_lsplat(fixed_int8_t a, int8_t b) {
@@ -253,7 +253,7 @@ fixed_int8_t lshift_i8_lsplat(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <64 x i8> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t rshift_i8_rsplat(fixed_int8_t a, int8_t b) {
@@ -266,7 +266,7 @@ fixed_int8_t rshift_i8_rsplat(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <64 x i8> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_int8_t rshift_i8_lsplat(fixed_int8_t a, int8_t b) {
@@ -281,7 +281,7 @@ fixed_int8_t rshift_i8_lsplat(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t lshift_u8_rsplat(fixed_uint8_t a, uint8_t b) {
@@ -294,7 +294,7 @@ fixed_uint8_t lshift_u8_rsplat(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t lshift_u8_lsplat(fixed_uint8_t a, uint8_t b) {
@@ -309,7 +309,7 @@ fixed_uint8_t lshift_u8_lsplat(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <64 x i8> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t rshift_u8_rsplat(fixed_uint8_t a, uint8_t b) {
@@ -322,7 +322,7 @@ fixed_uint8_t rshift_u8_rsplat(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <64 x i8> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CASTSCALABLESVE]]
 //
 fixed_uint8_t rshift_u8_lsplat(fixed_uint8_t a, uint8_t b) {
@@ -337,7 +337,7 @@ fixed_uint8_t rshift_u8_lsplat(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t lshift_i16_rsplat(fixed_int16_t a, int16_t b) {
@@ -350,7 +350,7 @@ fixed_int16_t lshift_i16_rsplat(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t lshift_i16_lsplat(fixed_int16_t a, int16_t b) {
@@ -365,7 +365,7 @@ fixed_int16_t lshift_i16_lsplat(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i16> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t rshift_i16_rsplat(fixed_int16_t a, int16_t b) {
@@ -378,7 +378,7 @@ fixed_int16_t rshift_i16_rsplat(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i16> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_int16_t rshift_i16_lsplat(fixed_int16_t a, int16_t b) {
@@ -393,7 +393,7 @@ fixed_int16_t rshift_i16_lsplat(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t lshift_u16_rsplat(fixed_uint16_t a, uint16_t b) {
@@ -406,7 +406,7 @@ fixed_uint16_t lshift_u16_rsplat(fixed_uint16_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t lshift_u16_lsplat(fixed_uint16_t a, uint16_t b) {
@@ -421,7 +421,7 @@ fixed_uint16_t lshift_u16_lsplat(fixed_uint16_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i16> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t rshift_u16_rsplat(fixed_uint16_t a, uint16_t b) {
@@ -434,7 +434,7 @@ fixed_uint16_t rshift_u16_rsplat(fixed_uint16_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i16> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CASTSCALABLESVE]]
 //
 fixed_uint16_t rshift_u16_lsplat(fixed_uint16_t a, uint16_t b) {
@@ -447,7 +447,7 @@ fixed_uint16_t rshift_u16_lsplat(fixed_uint16_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t lshift_i32_rsplat(fixed_int32_t a, int32_t b) {
@@ -460,7 +460,7 @@ fixed_int32_t lshift_i32_rsplat(fixed_int32_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t lshift_i32_lsplat(fixed_int32_t a, int32_t b) {
@@ -473,7 +473,7 @@ fixed_int32_t lshift_i32_lsplat(fixed_int32_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t rshift_i32_rsplat(fixed_int32_t a, int32_t b) {
@@ -486,7 +486,7 @@ fixed_int32_t rshift_i32_rsplat(fixed_int32_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i32> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t rshift_i32_lsplat(fixed_int32_t a, int32_t b) {
@@ -499,7 +499,7 @@ fixed_int32_t rshift_i32_lsplat(fixed_int32_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t lshift_u32_rsplat(fixed_uint32_t a, uint32_t b) {
@@ -512,7 +512,7 @@ fixed_uint32_t lshift_u32_rsplat(fixed_uint32_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t lshift_u32_lsplat(fixed_uint32_t a, uint32_t b) {
@@ -525,7 +525,7 @@ fixed_uint32_t lshift_u32_lsplat(fixed_uint32_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t rshift_u32_rsplat(fixed_uint32_t a, uint32_t b) {
@@ -538,7 +538,7 @@ fixed_uint32_t rshift_u32_rsplat(fixed_uint32_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i32> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_uint32_t rshift_u32_lsplat(fixed_uint32_t a, uint32_t b) {
@@ -551,7 +551,7 @@ fixed_uint32_t rshift_u32_lsplat(fixed_uint32_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t lshift_i64_rsplat(fixed_int64_t a, int64_t b) {
@@ -564,7 +564,7 @@ fixed_int64_t lshift_i64_rsplat(fixed_int64_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t lshift_i64_lsplat(fixed_int64_t a, int64_t b) {
@@ -577,7 +577,7 @@ fixed_int64_t lshift_i64_lsplat(fixed_int64_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t rshift_i64_rsplat(fixed_int64_t a, int64_t b) {
@@ -590,7 +590,7 @@ fixed_int64_t rshift_i64_rsplat(fixed_int64_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i64> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_int64_t rshift_i64_lsplat(fixed_int64_t a, int64_t b) {
@@ -603,7 +603,7 @@ fixed_int64_t rshift_i64_lsplat(fixed_int64_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t lshift_u64_rsplat(fixed_uint64_t a, uint64_t b) {
@@ -616,7 +616,7 @@ fixed_uint64_t lshift_u64_rsplat(fixed_uint64_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t lshift_u64_lsplat(fixed_uint64_t a, uint64_t b) {
@@ -629,7 +629,7 @@ fixed_uint64_t lshift_u64_lsplat(fixed_uint64_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t rshift_u64_rsplat(fixed_uint64_t a, uint64_t b) {
@@ -642,7 +642,7 @@ fixed_uint64_t rshift_u64_rsplat(fixed_uint64_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i64> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
 //
 fixed_uint64_t rshift_u64_lsplat(fixed_uint64_t a, uint64_t b) {
diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c
index 6fd39309818259..3fa104b78f2126 100644
--- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c
+++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c
@@ -140,7 +140,7 @@ gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) {
 // CHECK-LABEL: @to_fixed_int32m1_t__from_gnu_int32m1_t(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TYPE]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TYPE]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) {
diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-codegen.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-codegen.c
index a6b3f3732f281f..bfdb39b6ac82d4 100644
--- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-codegen.c
+++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-codegen.c
@@ -69,7 +69,7 @@ fixed_bool32_t global_bool32;
 // CHECK-NEXT:    [[CAST_FIXED:%.*]] = call <256 x i8> @llvm.vector.extract.v256i8.nxv64i8(<vscale x 64 x i8> [[TMP7]], i64 0)
 // CHECK-NEXT:    store <256 x i8> [[CAST_FIXED]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP8:%.*]] = load <256 x i8>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE2:%.*]] = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.v256i8(<vscale x 64 x i8> undef, <256 x i8> [[TMP8]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE2:%.*]] = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.v256i8(<vscale x 64 x i8> poison, <256 x i8> [[TMP8]], i64 0)
 // CHECK-NEXT:    ret <vscale x 64 x i8> [[CAST_SCALABLE2]]
 //
 fixed_int8m8_t test_bool1(vbool1_t m, vint8m8_t vec) {
@@ -99,7 +99,7 @@ fixed_int8m8_t test_bool1(vbool1_t m, vint8m8_t vec) {
 // CHECK-NEXT:    [[CAST_FIXED:%.*]] = call <64 x i16> @llvm.vector.extract.v64i16.nxv16i16(<vscale x 16 x i16> [[TMP7]], i64 0)
 // CHECK-NEXT:    store <64 x i16> [[CAST_FIXED]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP8:%.*]] = load <64 x i16>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE2:%.*]] = call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.v64i16(<vscale x 16 x i16> undef, <64 x i16> [[TMP8]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE2:%.*]] = call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.v64i16(<vscale x 16 x i16> poison, <64 x i16> [[TMP8]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i16> [[CAST_SCALABLE2]]
 //
 fixed_int16m4_t test_bool4(vbool4_t m, vint16m4_t vec) {
@@ -130,7 +130,7 @@ fixed_int16m4_t test_bool4(vbool4_t m, vint16m4_t vec) {
 // CHECK-NEXT:    [[CAST_FIXED:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TMP7]], i64 0)
 // CHECK-NEXT:    store <8 x i32> [[CAST_FIXED]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP8:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE1:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP8]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE1:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TMP8]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE1]]
 //
 fixed_int32m1_t test_bool32(vbool32_t m, vint32m1_t vec) {
@@ -147,7 +147,7 @@ fixed_int32m1_t test_bool32(vbool32_t m, vint32m1_t vec) {
 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 8
 // CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TMP2]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t test_ptr_to_global() {
@@ -168,7 +168,7 @@ fixed_int32m1_t test_ptr_to_global() {
 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[ARRAYIDX]], align 8
 // CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TMP2]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t array_arg(fixed_int32m1_t arr[]) {
@@ -186,7 +186,7 @@ fixed_int32m1_t array_arg(fixed_int32m1_t arr[]) {
 // CHECK-NEXT:    [[TMP1:%.*]] = load <32 x i8>, ptr [[TMP0]], align 8
 // CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[TMP2]], i64 0)
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <vscale x 8 x i8> [[CAST_SCALABLE]] to <vscale x 64 x i1>
 // CHECK-NEXT:    ret <vscale x 64 x i1> [[TMP3]]
 //
@@ -208,7 +208,7 @@ fixed_bool1_t address_of_array_idx_bool1() {
 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 8
 // CHECK-NEXT:    store <8 x i8> [[TMP1]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[TMP2]], i64 0)
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP3]]
 //
@@ -253,7 +253,7 @@ fixed_bool32_t address_of_array_idx_bool32() {
 // CHECK-NEXT:    [[CAST_FIXED:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TMP2]], i64 0)
 // CHECK-NEXT:    store <8 x i32> [[CAST_FIXED]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE1:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE1:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TMP3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE1]]
 //
 fixed_int32m1_t test_cast(vint32m1_t vec) {
@@ -269,7 +269,7 @@ fixed_int32m1_t test_cast(vint32m1_t vec) {
 // CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[TMP0]], align 8
 // CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP2]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m2_t test_ptr_to_global_m2() {
@@ -290,7 +290,7 @@ fixed_int32m2_t test_ptr_to_global_m2() {
 // CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[ARRAYIDX]], align 8
 // CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP2]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m2_t array_arg_m2(fixed_int32m2_t arr[]) {
@@ -309,7 +309,7 @@ fixed_int32m2_t array_arg_m2(fixed_int32m2_t arr[]) {
 // CHECK-NEXT:    [[CAST_FIXED:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
 // CHECK-NEXT:    store <16 x i32> [[CAST_FIXED]], ptr [[RETVAL]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 8
-// CHECK-NEXT:    [[CAST_SCALABLE1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP3]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE1]]
 //
 fixed_int32m2_t test_cast_m2(vint32m2_t vec) {
diff --git a/clang/test/CodeGen/RISCV/rvv-vls-arith-ops.c b/clang/test/CodeGen/RISCV/rvv-vls-arith-ops.c
index 76fcf38a0d98f0..0f72541e431db2 100644
--- a/clang/test/CodeGen/RISCV/rvv-vls-arith-ops.c
+++ b/clang/test/CodeGen/RISCV/rvv-vls-arith-ops.c
@@ -39,7 +39,7 @@ typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(__ri
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t add_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -51,7 +51,7 @@ fixed_int8m1_t add_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t add_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -63,7 +63,7 @@ fixed_int16m1_t add_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t add_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -75,7 +75,7 @@ fixed_int32m1_t add_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t add_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -87,7 +87,7 @@ fixed_int64m1_t add_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t add_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -99,7 +99,7 @@ fixed_uint8m1_t add_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t add_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -111,7 +111,7 @@ fixed_uint16m1_t add_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t add_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -123,7 +123,7 @@ fixed_uint32m1_t add_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t add_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -135,7 +135,7 @@ fixed_uint64m1_t add_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t add_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -147,7 +147,7 @@ fixed_float32m1_t add_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t add_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -159,7 +159,7 @@ fixed_float64m1_t add_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t add_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -171,7 +171,7 @@ fixed_int8m1_t add_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t add_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -183,7 +183,7 @@ fixed_int16m1_t add_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t add_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -195,7 +195,7 @@ fixed_int32m1_t add_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t add_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -207,7 +207,7 @@ fixed_int64m1_t add_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t add_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -219,7 +219,7 @@ fixed_uint8m1_t add_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t add_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -231,7 +231,7 @@ fixed_uint16m1_t add_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t add_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -243,7 +243,7 @@ fixed_uint32m1_t add_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t add_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -255,7 +255,7 @@ fixed_uint64m1_t add_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t add_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -267,7 +267,7 @@ fixed_float32m1_t add_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t add_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -280,7 +280,7 @@ fixed_float64m1_t add_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t add_scalar_i8(fixed_int8m1_t a, int8_t b) {
@@ -293,7 +293,7 @@ fixed_int8m1_t add_scalar_i8(fixed_int8m1_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t add_scalar_i16(fixed_int16m1_t a, int16_t b) {
@@ -306,7 +306,7 @@ fixed_int16m1_t add_scalar_i16(fixed_int16m1_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t add_scalar_i32(fixed_int32m1_t a, int32_t b) {
@@ -319,7 +319,7 @@ fixed_int32m1_t add_scalar_i32(fixed_int32m1_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t add_scalar_i64(fixed_int64m1_t a, int64_t b) {
@@ -332,7 +332,7 @@ fixed_int64m1_t add_scalar_i64(fixed_int64m1_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t add_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
@@ -345,7 +345,7 @@ fixed_uint8m1_t add_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t add_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
@@ -358,7 +358,7 @@ fixed_uint16m1_t add_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t add_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
@@ -371,7 +371,7 @@ fixed_uint32m1_t add_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t add_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
@@ -384,7 +384,7 @@ fixed_uint64m1_t add_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x float> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t add_scalar_f32(fixed_float32m1_t a, float b) {
@@ -397,7 +397,7 @@ fixed_float32m1_t add_scalar_f32(fixed_float32m1_t a, float b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x double> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[ADD]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t add_scalar_f64(fixed_float64m1_t a, double b) {
@@ -411,7 +411,7 @@ fixed_float64m1_t add_scalar_f64(fixed_float64m1_t a, double b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t sub_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -423,7 +423,7 @@ fixed_int8m1_t sub_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t sub_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -435,7 +435,7 @@ fixed_int16m1_t sub_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t sub_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -447,7 +447,7 @@ fixed_int32m1_t sub_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t sub_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -459,7 +459,7 @@ fixed_int64m1_t sub_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t sub_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -471,7 +471,7 @@ fixed_uint8m1_t sub_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t sub_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -483,7 +483,7 @@ fixed_uint16m1_t sub_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t sub_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -495,7 +495,7 @@ fixed_uint32m1_t sub_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t sub_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -507,7 +507,7 @@ fixed_uint64m1_t sub_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t sub_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -519,7 +519,7 @@ fixed_float32m1_t sub_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t sub_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -531,7 +531,7 @@ fixed_float64m1_t sub_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t sub_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -543,7 +543,7 @@ fixed_int8m1_t sub_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t sub_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -555,7 +555,7 @@ fixed_int16m1_t sub_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t sub_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -567,7 +567,7 @@ fixed_int32m1_t sub_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t sub_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -579,7 +579,7 @@ fixed_int64m1_t sub_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t sub_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -591,7 +591,7 @@ fixed_uint8m1_t sub_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t sub_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -603,7 +603,7 @@ fixed_uint16m1_t sub_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t sub_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -615,7 +615,7 @@ fixed_uint32m1_t sub_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t sub_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -627,7 +627,7 @@ fixed_uint64m1_t sub_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t sub_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -639,7 +639,7 @@ fixed_float32m1_t sub_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t sub_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -652,7 +652,7 @@ fixed_float64m1_t sub_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t sub_scalar_i8(fixed_int8m1_t a, int8_t b) {
@@ -665,7 +665,7 @@ fixed_int8m1_t sub_scalar_i8(fixed_int8m1_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t sub_scalar_i16(fixed_int16m1_t a, int16_t b) {
@@ -678,7 +678,7 @@ fixed_int16m1_t sub_scalar_i16(fixed_int16m1_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t sub_scalar_i32(fixed_int32m1_t a, int32_t b) {
@@ -691,7 +691,7 @@ fixed_int32m1_t sub_scalar_i32(fixed_int32m1_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t sub_scalar_i64(fixed_int64m1_t a, int64_t b) {
@@ -704,7 +704,7 @@ fixed_int64m1_t sub_scalar_i64(fixed_int64m1_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t sub_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
@@ -717,7 +717,7 @@ fixed_uint8m1_t sub_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t sub_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
@@ -730,7 +730,7 @@ fixed_uint16m1_t sub_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t sub_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
@@ -743,7 +743,7 @@ fixed_uint32m1_t sub_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t sub_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
@@ -756,7 +756,7 @@ fixed_uint64m1_t sub_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x float> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t sub_scalar_f32(fixed_float32m1_t a, float b) {
@@ -769,7 +769,7 @@ fixed_float32m1_t sub_scalar_f32(fixed_float32m1_t a, float b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x double> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[SUB]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t sub_scalar_f64(fixed_float64m1_t a, double b) {
@@ -783,7 +783,7 @@ fixed_float64m1_t sub_scalar_f64(fixed_float64m1_t a, double b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t mul_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -795,7 +795,7 @@ fixed_int8m1_t mul_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t mul_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -807,7 +807,7 @@ fixed_int16m1_t mul_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t mul_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -819,7 +819,7 @@ fixed_int32m1_t mul_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t mul_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -831,7 +831,7 @@ fixed_int64m1_t mul_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t mul_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -843,7 +843,7 @@ fixed_uint8m1_t mul_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t mul_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -855,7 +855,7 @@ fixed_uint16m1_t mul_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t mul_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -867,7 +867,7 @@ fixed_uint32m1_t mul_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t mul_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -879,7 +879,7 @@ fixed_uint64m1_t mul_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t mul_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -891,7 +891,7 @@ fixed_float32m1_t mul_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t mul_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -903,7 +903,7 @@ fixed_float64m1_t mul_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t mul_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -915,7 +915,7 @@ fixed_int8m1_t mul_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t mul_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -927,7 +927,7 @@ fixed_int16m1_t mul_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t mul_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -939,7 +939,7 @@ fixed_int32m1_t mul_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t mul_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -951,7 +951,7 @@ fixed_int64m1_t mul_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t mul_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -963,7 +963,7 @@ fixed_uint8m1_t mul_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t mul_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -975,7 +975,7 @@ fixed_uint16m1_t mul_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t mul_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -987,7 +987,7 @@ fixed_uint32m1_t mul_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t mul_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -999,7 +999,7 @@ fixed_uint64m1_t mul_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t mul_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -1011,7 +1011,7 @@ fixed_float32m1_t mul_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t mul_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -1024,7 +1024,7 @@ fixed_float64m1_t mul_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t mul_scalar_i8(fixed_int8m1_t a, int8_t b) {
@@ -1037,7 +1037,7 @@ fixed_int8m1_t mul_scalar_i8(fixed_int8m1_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t mul_scalar_i16(fixed_int16m1_t a, int16_t b) {
@@ -1050,7 +1050,7 @@ fixed_int16m1_t mul_scalar_i16(fixed_int16m1_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t mul_scalar_i32(fixed_int32m1_t a, int32_t b) {
@@ -1063,7 +1063,7 @@ fixed_int32m1_t mul_scalar_i32(fixed_int32m1_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t mul_scalar_i64(fixed_int64m1_t a, int64_t b) {
@@ -1076,7 +1076,7 @@ fixed_int64m1_t mul_scalar_i64(fixed_int64m1_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t mul_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
@@ -1089,7 +1089,7 @@ fixed_uint8m1_t mul_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t mul_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
@@ -1102,7 +1102,7 @@ fixed_uint16m1_t mul_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t mul_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
@@ -1115,7 +1115,7 @@ fixed_uint32m1_t mul_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t mul_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
@@ -1128,7 +1128,7 @@ fixed_uint64m1_t mul_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x float> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t mul_scalar_f32(fixed_float32m1_t a, float b) {
@@ -1141,7 +1141,7 @@ fixed_float32m1_t mul_scalar_f32(fixed_float32m1_t a, float b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x double> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[MUL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t mul_scalar_f64(fixed_float64m1_t a, double b) {
@@ -1155,7 +1155,7 @@ fixed_float64m1_t mul_scalar_f64(fixed_float64m1_t a, double b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t div_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -1167,7 +1167,7 @@ fixed_int8m1_t div_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t div_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -1179,7 +1179,7 @@ fixed_int16m1_t div_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t div_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -1191,7 +1191,7 @@ fixed_int32m1_t div_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t div_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -1203,7 +1203,7 @@ fixed_int64m1_t div_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t div_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -1215,7 +1215,7 @@ fixed_uint8m1_t div_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t div_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -1227,7 +1227,7 @@ fixed_uint16m1_t div_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t div_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -1239,7 +1239,7 @@ fixed_uint32m1_t div_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t div_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -1251,7 +1251,7 @@ fixed_uint64m1_t div_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t div_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -1263,7 +1263,7 @@ fixed_float32m1_t div_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <4 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t div_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -1275,7 +1275,7 @@ fixed_float64m1_t div_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t div_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -1287,7 +1287,7 @@ fixed_int8m1_t div_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t div_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -1299,7 +1299,7 @@ fixed_int16m1_t div_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t div_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -1311,7 +1311,7 @@ fixed_int32m1_t div_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t div_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -1323,7 +1323,7 @@ fixed_int64m1_t div_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t div_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -1335,7 +1335,7 @@ fixed_uint8m1_t div_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t div_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -1347,7 +1347,7 @@ fixed_uint16m1_t div_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t div_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -1359,7 +1359,7 @@ fixed_uint32m1_t div_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t div_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -1371,7 +1371,7 @@ fixed_uint64m1_t div_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x float> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t div_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -1383,7 +1383,7 @@ fixed_float32m1_t div_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <4 x double> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t div_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -1396,7 +1396,7 @@ fixed_float64m1_t div_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t div_scalar_i8(fixed_int8m1_t a, int8_t b) {
@@ -1409,7 +1409,7 @@ fixed_int8m1_t div_scalar_i8(fixed_int8m1_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t div_scalar_i16(fixed_int16m1_t a, int16_t b) {
@@ -1422,7 +1422,7 @@ fixed_int16m1_t div_scalar_i16(fixed_int16m1_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t div_scalar_i32(fixed_int32m1_t a, int32_t b) {
@@ -1435,7 +1435,7 @@ fixed_int32m1_t div_scalar_i32(fixed_int32m1_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t div_scalar_i64(fixed_int64m1_t a, int64_t b) {
@@ -1448,7 +1448,7 @@ fixed_int64m1_t div_scalar_i64(fixed_int64m1_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t div_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
@@ -1461,7 +1461,7 @@ fixed_uint8m1_t div_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t div_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
@@ -1474,7 +1474,7 @@ fixed_uint16m1_t div_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t div_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
@@ -1487,7 +1487,7 @@ fixed_uint32m1_t div_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t div_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
@@ -1500,7 +1500,7 @@ fixed_uint64m1_t div_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x float> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> poison, <8 x float> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x float> [[CAST_SCALABLE]]
 //
 fixed_float32m1_t div_scalar_f32(fixed_float32m1_t a, float b) {
@@ -1513,7 +1513,7 @@ fixed_float32m1_t div_scalar_f32(fixed_float32m1_t a, float b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <4 x double> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> poison, <4 x double> [[DIV]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x double> [[CAST_SCALABLE]]
 //
 fixed_float64m1_t div_scalar_f64(fixed_float64m1_t a, double b) {
@@ -1527,7 +1527,7 @@ fixed_float64m1_t div_scalar_f64(fixed_float64m1_t a, double b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t rem_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -1539,7 +1539,7 @@ fixed_int8m1_t rem_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t rem_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -1551,7 +1551,7 @@ fixed_int16m1_t rem_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t rem_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -1563,7 +1563,7 @@ fixed_int32m1_t rem_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t rem_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -1575,7 +1575,7 @@ fixed_int64m1_t rem_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t rem_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -1587,7 +1587,7 @@ fixed_uint8m1_t rem_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t rem_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -1599,7 +1599,7 @@ fixed_uint16m1_t rem_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t rem_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -1611,7 +1611,7 @@ fixed_uint32m1_t rem_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t rem_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -1623,7 +1623,7 @@ fixed_uint64m1_t rem_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t rem_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -1635,7 +1635,7 @@ fixed_int8m1_t rem_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t rem_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -1647,7 +1647,7 @@ fixed_int16m1_t rem_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t rem_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -1659,7 +1659,7 @@ fixed_int32m1_t rem_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = srem <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t rem_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -1671,7 +1671,7 @@ fixed_int64m1_t rem_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t rem_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -1683,7 +1683,7 @@ fixed_uint8m1_t rem_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t rem_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -1695,7 +1695,7 @@ fixed_uint16m1_t rem_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t rem_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -1707,7 +1707,7 @@ fixed_uint32m1_t rem_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[REM:%.*]] = urem <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t rem_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -1720,7 +1720,7 @@ fixed_uint64m1_t rem_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <32 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t rem_scalar_i8(fixed_int8m1_t a, int8_t b) {
@@ -1733,7 +1733,7 @@ fixed_int8m1_t rem_scalar_i8(fixed_int8m1_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <16 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t rem_scalar_i16(fixed_int16m1_t a, int16_t b) {
@@ -1746,7 +1746,7 @@ fixed_int16m1_t rem_scalar_i16(fixed_int16m1_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t rem_scalar_i32(fixed_int32m1_t a, int32_t b) {
@@ -1759,7 +1759,7 @@ fixed_int32m1_t rem_scalar_i32(fixed_int32m1_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t rem_scalar_i64(fixed_int64m1_t a, int64_t b) {
@@ -1772,7 +1772,7 @@ fixed_int64m1_t rem_scalar_i64(fixed_int64m1_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <32 x i8> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t rem_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
@@ -1785,7 +1785,7 @@ fixed_uint8m1_t rem_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <16 x i16> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t rem_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
@@ -1798,7 +1798,7 @@ fixed_uint16m1_t rem_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t rem_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
@@ -1811,7 +1811,7 @@ fixed_uint32m1_t rem_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[REM]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t rem_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
diff --git a/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c b/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c
index 0c81adcfe0b0b0..8366347248a8f1 100644
--- a/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c
+++ b/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c
@@ -39,7 +39,7 @@ typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(__ri
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t and_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -51,7 +51,7 @@ fixed_int8m1_t and_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t and_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -63,7 +63,7 @@ fixed_int16m1_t and_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t and_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -75,7 +75,7 @@ fixed_int32m1_t and_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t and_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -87,7 +87,7 @@ fixed_int64m1_t and_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t and_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -99,7 +99,7 @@ fixed_uint8m1_t and_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t and_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -111,7 +111,7 @@ fixed_uint16m1_t and_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t and_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -123,7 +123,7 @@ fixed_uint32m1_t and_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[AND:%.*]] = and <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[AND]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[AND]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t and_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -137,7 +137,7 @@ fixed_uint64m1_t and_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t or_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -149,7 +149,7 @@ fixed_int8m1_t or_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t or_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -161,7 +161,7 @@ fixed_int16m1_t or_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t or_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -173,7 +173,7 @@ fixed_int32m1_t or_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t or_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -185,7 +185,7 @@ fixed_int64m1_t or_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t or_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -197,7 +197,7 @@ fixed_uint8m1_t or_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t or_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -209,7 +209,7 @@ fixed_uint16m1_t or_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t or_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -221,7 +221,7 @@ fixed_uint32m1_t or_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[OR:%.*]] = or <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[OR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[OR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t or_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -235,7 +235,7 @@ fixed_uint64m1_t or_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t xor_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -247,7 +247,7 @@ fixed_int8m1_t xor_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t xor_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -259,7 +259,7 @@ fixed_int16m1_t xor_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t xor_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -271,7 +271,7 @@ fixed_int32m1_t xor_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t xor_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -283,7 +283,7 @@ fixed_int64m1_t xor_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t xor_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -295,7 +295,7 @@ fixed_uint8m1_t xor_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t xor_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -307,7 +307,7 @@ fixed_uint16m1_t xor_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t xor_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -319,7 +319,7 @@ fixed_uint32m1_t xor_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[XOR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[XOR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t xor_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -332,7 +332,7 @@ fixed_uint64m1_t xor_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i8> [[A]], splat (i8 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t not_i8(fixed_int8m1_t a) {
@@ -343,7 +343,7 @@ fixed_int8m1_t not_i8(fixed_int8m1_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i16> [[A]], splat (i16 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t not_i16(fixed_int16m1_t a) {
@@ -354,7 +354,7 @@ fixed_int16m1_t not_i16(fixed_int16m1_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i32> [[A]], splat (i32 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t not_i32(fixed_int32m1_t a) {
@@ -365,7 +365,7 @@ fixed_int32m1_t not_i32(fixed_int32m1_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i64> [[A]], splat (i64 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t not_i64(fixed_int64m1_t a) {
@@ -376,7 +376,7 @@ fixed_int64m1_t not_i64(fixed_int64m1_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i8> [[A]], splat (i8 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t not_u8(fixed_uint8m1_t a) {
@@ -387,7 +387,7 @@ fixed_uint8m1_t not_u8(fixed_uint8m1_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i16> [[A]], splat (i16 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t not_u16(fixed_uint16m1_t a) {
@@ -398,7 +398,7 @@ fixed_uint16m1_t not_u16(fixed_uint16m1_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i32> [[A]], splat (i32 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t not_u32(fixed_uint32m1_t a) {
@@ -409,7 +409,7 @@ fixed_uint32m1_t not_u32(fixed_uint32m1_t a) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i64> [[A]], splat (i64 -1)
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[NOT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t not_u64(fixed_uint64m1_t a) {
diff --git a/clang/test/CodeGen/RISCV/rvv-vls-compare-ops.c b/clang/test/CodeGen/RISCV/rvv-vls-compare-ops.c
index f22b4fbb5251fd..dc0ba616bd1b34 100644
--- a/clang/test/CodeGen/RISCV/rvv-vls-compare-ops.c
+++ b/clang/test/CodeGen/RISCV/rvv-vls-compare-ops.c
@@ -40,7 +40,7 @@ typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(__ri
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t eq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -53,7 +53,7 @@ fixed_int8m1_t eq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t eq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -66,7 +66,7 @@ fixed_int16m1_t eq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t eq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -79,7 +79,7 @@ fixed_int32m1_t eq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t eq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -92,7 +92,7 @@ fixed_int64m1_t eq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t eq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -105,7 +105,7 @@ fixed_int8m1_t eq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t eq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -118,7 +118,7 @@ fixed_int16m1_t eq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t eq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -131,7 +131,7 @@ fixed_int32m1_t eq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t eq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -144,7 +144,7 @@ fixed_int64m1_t eq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <8 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t eq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -157,7 +157,7 @@ fixed_int32m1_t eq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <4 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t eq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -172,7 +172,7 @@ fixed_int64m1_t eq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t neq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -185,7 +185,7 @@ fixed_int8m1_t neq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t neq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -198,7 +198,7 @@ fixed_int16m1_t neq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t neq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -211,7 +211,7 @@ fixed_int32m1_t neq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t neq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -224,7 +224,7 @@ fixed_int64m1_t neq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t neq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -237,7 +237,7 @@ fixed_int8m1_t neq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t neq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -250,7 +250,7 @@ fixed_int16m1_t neq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t neq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -263,7 +263,7 @@ fixed_int32m1_t neq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ne <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t neq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -276,7 +276,7 @@ fixed_int64m1_t neq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp une <8 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t neq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -289,7 +289,7 @@ fixed_int32m1_t neq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp une <4 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t neq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -304,7 +304,7 @@ fixed_int64m1_t neq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp slt <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t lt_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -317,7 +317,7 @@ fixed_int8m1_t lt_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp slt <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t lt_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -330,7 +330,7 @@ fixed_int16m1_t lt_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp slt <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t lt_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -343,7 +343,7 @@ fixed_int32m1_t lt_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp slt <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t lt_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -356,7 +356,7 @@ fixed_int64m1_t lt_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ult <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t lt_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -369,7 +369,7 @@ fixed_int8m1_t lt_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ult <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t lt_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -382,7 +382,7 @@ fixed_int16m1_t lt_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ult <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t lt_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -395,7 +395,7 @@ fixed_int32m1_t lt_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t lt_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -408,7 +408,7 @@ fixed_int64m1_t lt_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <8 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t lt_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -421,7 +421,7 @@ fixed_int32m1_t lt_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t lt_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -436,7 +436,7 @@ fixed_int64m1_t lt_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sle <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t leq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -449,7 +449,7 @@ fixed_int8m1_t leq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sle <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t leq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -462,7 +462,7 @@ fixed_int16m1_t leq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sle <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t leq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -475,7 +475,7 @@ fixed_int32m1_t leq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sle <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t leq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -488,7 +488,7 @@ fixed_int64m1_t leq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ule <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t leq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -501,7 +501,7 @@ fixed_int8m1_t leq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ule <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t leq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -514,7 +514,7 @@ fixed_int16m1_t leq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ule <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t leq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -527,7 +527,7 @@ fixed_int32m1_t leq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ule <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t leq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -540,7 +540,7 @@ fixed_int64m1_t leq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <8 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t leq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -553,7 +553,7 @@ fixed_int32m1_t leq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <4 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t leq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -568,7 +568,7 @@ fixed_int64m1_t leq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t gt_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -581,7 +581,7 @@ fixed_int8m1_t gt_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t gt_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -594,7 +594,7 @@ fixed_int16m1_t gt_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t gt_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -607,7 +607,7 @@ fixed_int32m1_t gt_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t gt_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -620,7 +620,7 @@ fixed_int64m1_t gt_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t gt_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -633,7 +633,7 @@ fixed_int8m1_t gt_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t gt_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -646,7 +646,7 @@ fixed_int16m1_t gt_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t gt_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -659,7 +659,7 @@ fixed_int32m1_t gt_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t gt_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -672,7 +672,7 @@ fixed_int64m1_t gt_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <8 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t gt_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -685,7 +685,7 @@ fixed_int32m1_t gt_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <4 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t gt_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
@@ -700,7 +700,7 @@ fixed_int64m1_t gt_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sge <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t geq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -713,7 +713,7 @@ fixed_int8m1_t geq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sge <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t geq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -726,7 +726,7 @@ fixed_int16m1_t geq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sge <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t geq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -739,7 +739,7 @@ fixed_int32m1_t geq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp sge <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t geq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -752,7 +752,7 @@ fixed_int64m1_t geq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp uge <32 x i8> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t geq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -765,7 +765,7 @@ fixed_int8m1_t geq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp uge <16 x i16> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t geq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -778,7 +778,7 @@ fixed_int16m1_t geq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp uge <8 x i32> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t geq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -791,7 +791,7 @@ fixed_int32m1_t geq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = icmp uge <4 x i64> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t geq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -804,7 +804,7 @@ fixed_int64m1_t geq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <8 x float> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t geq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
@@ -817,7 +817,7 @@ fixed_int32m1_t geq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
 // CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <4 x double> [[A]], [[B]]
 // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SEXT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t geq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
diff --git a/clang/test/CodeGen/RISCV/rvv-vls-shift-ops.c b/clang/test/CodeGen/RISCV/rvv-vls-shift-ops.c
index d88a5ba239e821..7c4196a879f618 100644
--- a/clang/test/CodeGen/RISCV/rvv-vls-shift-ops.c
+++ b/clang/test/CodeGen/RISCV/rvv-vls-shift-ops.c
@@ -37,7 +37,7 @@ typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(__ri
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t lshift_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -49,7 +49,7 @@ fixed_int8m1_t lshift_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t rshift_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
@@ -61,7 +61,7 @@ fixed_int8m1_t rshift_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t lshift_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -73,7 +73,7 @@ fixed_uint8m1_t lshift_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i8> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t rshift_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
@@ -85,7 +85,7 @@ fixed_uint8m1_t rshift_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t lshift_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -97,7 +97,7 @@ fixed_int16m1_t lshift_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t rshift_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
@@ -109,7 +109,7 @@ fixed_int16m1_t rshift_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t lshift_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -121,7 +121,7 @@ fixed_uint16m1_t lshift_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i16> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t rshift_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
@@ -133,7 +133,7 @@ fixed_uint16m1_t rshift_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t lshift_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -145,7 +145,7 @@ fixed_int32m1_t lshift_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t rshift_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
@@ -157,7 +157,7 @@ fixed_int32m1_t rshift_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t lshift_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -169,7 +169,7 @@ fixed_uint32m1_t lshift_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i32> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t rshift_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
@@ -181,7 +181,7 @@ fixed_uint32m1_t rshift_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t lshift_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -193,7 +193,7 @@ fixed_int64m1_t lshift_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t rshift_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
@@ -205,7 +205,7 @@ fixed_int64m1_t rshift_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t lshift_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -217,7 +217,7 @@ fixed_uint64m1_t lshift_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i64> [[A]], [[B]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t rshift_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
@@ -232,7 +232,7 @@ fixed_uint64m1_t rshift_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t lshift_i8_rsplat(fixed_int8m1_t a, int8_t b) {
@@ -245,7 +245,7 @@ fixed_int8m1_t lshift_i8_rsplat(fixed_int8m1_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t lshift_i8_lsplat(fixed_int8m1_t a, int8_t b) {
@@ -260,7 +260,7 @@ fixed_int8m1_t lshift_i8_lsplat(fixed_int8m1_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8>
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i8> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t rshift_i8_rsplat(fixed_int8m1_t a, int8_t b) {
@@ -273,7 +273,7 @@ fixed_int8m1_t rshift_i8_rsplat(fixed_int8m1_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i8> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_int8m1_t rshift_i8_lsplat(fixed_int8m1_t a, int8_t b) {
@@ -288,7 +288,7 @@ fixed_int8m1_t rshift_i8_lsplat(fixed_int8m1_t a, int8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t lshift_u8_rsplat(fixed_uint8m1_t a, uint8_t b) {
@@ -301,7 +301,7 @@ fixed_uint8m1_t lshift_u8_rsplat(fixed_uint8m1_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t lshift_u8_lsplat(fixed_uint8m1_t a, uint8_t b) {
@@ -316,7 +316,7 @@ fixed_uint8m1_t lshift_u8_lsplat(fixed_uint8m1_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8>
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i8> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t rshift_u8_rsplat(fixed_uint8m1_t a, uint8_t b) {
@@ -329,7 +329,7 @@ fixed_uint8m1_t rshift_u8_rsplat(fixed_uint8m1_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i8> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> poison, <32 x i8> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
 fixed_uint8m1_t rshift_u8_lsplat(fixed_uint8m1_t a, uint8_t b) {
@@ -344,7 +344,7 @@ fixed_uint8m1_t rshift_u8_lsplat(fixed_uint8m1_t a, uint8_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t lshift_i16_rsplat(fixed_int16m1_t a, int16_t b) {
@@ -357,7 +357,7 @@ fixed_int16m1_t lshift_i16_rsplat(fixed_int16m1_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t lshift_i16_lsplat(fixed_int16m1_t a, int16_t b) {
@@ -372,7 +372,7 @@ fixed_int16m1_t lshift_i16_lsplat(fixed_int16m1_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16>
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i16> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t rshift_i16_rsplat(fixed_int16m1_t a, int16_t b) {
@@ -385,7 +385,7 @@ fixed_int16m1_t rshift_i16_rsplat(fixed_int16m1_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i16> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_int16m1_t rshift_i16_lsplat(fixed_int16m1_t a, int16_t b) {
@@ -400,7 +400,7 @@ fixed_int16m1_t rshift_i16_lsplat(fixed_int16m1_t a, int16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t lshift_u16_rsplat(fixed_uint16m1_t a, uint16_t b) {
@@ -413,7 +413,7 @@ fixed_uint16m1_t lshift_u16_rsplat(fixed_uint16m1_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t lshift_u16_lsplat(fixed_uint16m1_t a, uint16_t b) {
@@ -428,7 +428,7 @@ fixed_uint16m1_t lshift_u16_lsplat(fixed_uint16m1_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16>
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i16> [[A]], [[SH_PROM]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t rshift_u16_rsplat(fixed_uint16m1_t a, uint16_t b) {
@@ -441,7 +441,7 @@ fixed_uint16m1_t rshift_u16_rsplat(fixed_uint16m1_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i16> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> poison, <16 x i16> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
 fixed_uint16m1_t rshift_u16_lsplat(fixed_uint16m1_t a, uint16_t b) {
@@ -454,7 +454,7 @@ fixed_uint16m1_t rshift_u16_lsplat(fixed_uint16m1_t a, uint16_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t lshift_i32_rsplat(fixed_int32m1_t a, int32_t b) {
@@ -467,7 +467,7 @@ fixed_int32m1_t lshift_i32_rsplat(fixed_int32m1_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t lshift_i32_lsplat(fixed_int32m1_t a, int32_t b) {
@@ -480,7 +480,7 @@ fixed_int32m1_t lshift_i32_lsplat(fixed_int32m1_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t rshift_i32_rsplat(fixed_int32m1_t a, int32_t b) {
@@ -493,7 +493,7 @@ fixed_int32m1_t rshift_i32_rsplat(fixed_int32m1_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i32> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_int32m1_t rshift_i32_lsplat(fixed_int32m1_t a, int32_t b) {
@@ -506,7 +506,7 @@ fixed_int32m1_t rshift_i32_lsplat(fixed_int32m1_t a, int32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t lshift_u32_rsplat(fixed_uint32m1_t a, uint32_t b) {
@@ -519,7 +519,7 @@ fixed_uint32m1_t lshift_u32_rsplat(fixed_uint32m1_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t lshift_u32_lsplat(fixed_uint32m1_t a, uint32_t b) {
@@ -532,7 +532,7 @@ fixed_uint32m1_t lshift_u32_lsplat(fixed_uint32m1_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i32> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t rshift_u32_rsplat(fixed_uint32m1_t a, uint32_t b) {
@@ -545,7 +545,7 @@ fixed_uint32m1_t rshift_u32_rsplat(fixed_uint32m1_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i32> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
 fixed_uint32m1_t rshift_u32_lsplat(fixed_uint32m1_t a, uint32_t b) {
@@ -558,7 +558,7 @@ fixed_uint32m1_t rshift_u32_lsplat(fixed_uint32m1_t a, uint32_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t lshift_i64_rsplat(fixed_int64m1_t a, int64_t b) {
@@ -571,7 +571,7 @@ fixed_int64m1_t lshift_i64_rsplat(fixed_int64m1_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t lshift_i64_lsplat(fixed_int64m1_t a, int64_t b) {
@@ -584,7 +584,7 @@ fixed_int64m1_t lshift_i64_lsplat(fixed_int64m1_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t rshift_i64_rsplat(fixed_int64m1_t a, int64_t b) {
@@ -597,7 +597,7 @@ fixed_int64m1_t rshift_i64_rsplat(fixed_int64m1_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <4 x i64> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_int64m1_t rshift_i64_lsplat(fixed_int64m1_t a, int64_t b) {
@@ -610,7 +610,7 @@ fixed_int64m1_t rshift_i64_lsplat(fixed_int64m1_t a, int64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t lshift_u64_rsplat(fixed_uint64m1_t a, uint64_t b) {
@@ -623,7 +623,7 @@ fixed_uint64m1_t lshift_u64_rsplat(fixed_uint64m1_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHL]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t lshift_u64_lsplat(fixed_uint64m1_t a, uint64_t b) {
@@ -636,7 +636,7 @@ fixed_uint64m1_t lshift_u64_lsplat(fixed_uint64m1_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i64> [[A]], [[SPLAT_SPLAT]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t rshift_u64_rsplat(fixed_uint64m1_t a, uint64_t b) {
@@ -649,7 +649,7 @@ fixed_uint64m1_t rshift_u64_rsplat(fixed_uint64m1_t a, uint64_t b) {
 // CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i64> [[SPLAT_SPLAT]], [[A]]
-// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> poison, <4 x i64> [[SHR]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
 fixed_uint64m1_t rshift_u64_lsplat(fixed_uint64m1_t a, uint64_t b) {
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
index 8be1123a2305a3..cdfe6d5df848bb 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
@@ -95,7 +95,7 @@ gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
 // CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TYPE]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
index 5db75547abda12..c643bcf61455b8 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
@@ -40,7 +40,7 @@ fixed_int32_t global_vec;
 // CHECK-NEXT:    [[CASTFIXEDSVE3:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP10]], i64 0)
 // CHECK-NEXT:    store <16 x i32> [[CASTFIXEDSVE3]], ptr [[RETVAL]], align 16
 // CHECK-NEXT:    [[TMP11:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 16
-// CHECK-NEXT:    [[CASTSCALABLESVE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP11]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP11]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE4]]
 //
 fixed_int32_t foo(svbool_t pred, svint32_t vec) {
@@ -57,7 +57,7 @@ fixed_int32_t foo(svbool_t pred, svint32_t vec) {
 // CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16
 // CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 16
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP2]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t test_ptr_to_global() {
@@ -78,7 +78,7 @@ fixed_int32_t test_ptr_to_global() {
 // CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[ARRAYIDX]], align 16
 // CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 16
 // CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 16
-// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP2]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
 //
 fixed_int32_t array_arg(fixed_int32_t arr[]) {
@@ -96,7 +96,7 @@ fixed_int32_t array_arg(fixed_int32_t arr[]) {
 // CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 2
 // CHECK-NEXT:    store <8 x i8> [[TMP1]], ptr [[RETVAL]], align 2
 // CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[RETVAL]], align 2
-// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[TMP2]], i64 0)
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <vscale x 2 x i8> [[CASTFIXEDSVE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP3]]
 //
@@ -139,7 +139,7 @@ fixed_bool_t address_of_array_idx() {
 // CHECK-NEXT:    [[CASTFIXEDSVE3:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP11]], i64 0)
 // CHECK-NEXT:    store <16 x i32> [[CASTFIXEDSVE3]], ptr [[RETVAL]], align 16
 // CHECK-NEXT:    [[TMP12:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 16
-// CHECK-NEXT:    [[CASTSCALABLESVE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP12]], i64 0)
+// CHECK-NEXT:    [[CASTSCALABLESVE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP12]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CASTSCALABLESVE4]]
 //
 fixed_int32_t test_cast(svbool_t pred, svint32_t vec) {



More information about the cfe-commits mailing list