[llvm] eae26b6 - [IRBuilder] Use canonical i64 type for insertelement index used by vector splats.

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 11 06:10:13 PST 2023


Author: Paul Walker
Date: 2023-01-11T14:08:06Z
New Revision: eae26b6640afff715172d75fdee02e7df7530a9b

URL: https://github.com/llvm/llvm-project/commit/eae26b6640afff715172d75fdee02e7df7530a9b
DIFF: https://github.com/llvm/llvm-project/commit/eae26b6640afff715172d75fdee02e7df7530a9b.diff

LOG: [IRBuilder] Use canonical i64 type for insertelement index used by vector splats.

Instcombine prefers this canonical form (see getPreferredVectorIndex),
as does IRBuilder when passing the index as an integer so we may as
well use the prefered form from creation.

NOTE: All test changes are mechanical with nothing else expected
beyond a change of index type from i32 to i64.

Differential Revision: https://reviews.llvm.org/D140983

Added: 
    

Modified: 
    clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
    clang/test/CodeGen/SystemZ/zvector.c
    clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
    clang/test/CodeGen/aarch64-sve-vector-bitwise-ops.c
    clang/test/CodeGen/aarch64-sve-vector-shift-ops.c
    clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
    clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
    clang/test/CodeGen/arm-mve-intrinsics/compare.c
    clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
    clang/test/CodeGen/arm-mve-intrinsics/dup.c
    clang/test/CodeGen/arm-mve-intrinsics/ternary.c
    clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
    clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c
    clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c
    clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
    clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c
    clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c
    clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c
    clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c
    clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c
    clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c
    clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
    clang/test/CodeGen/matrix-type-operators-fast-math.c
    clang/test/CodeGen/matrix-type-operators.c
    clang/test/CodeGen/vecshift.c
    clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp
    clang/test/CodeGenCXX/ext-int.cpp
    clang/test/CodeGenCXX/ext-vector-type-conditional.cpp
    clang/test/CodeGenCXX/matrix-type-operators.cpp
    clang/test/CodeGenCXX/vector-size-conditional.cpp
    clang/test/CodeGenCXX/vector-splat-conversion.cpp
    clang/test/CodeGenOpenCL/bool_cast.cl
    llvm/lib/IR/Constants.cpp
    llvm/lib/IR/IRBuilder.cpp
    llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll
    llvm/test/CodeGen/Generic/expand-vp-load-store.ll
    llvm/test/CodeGen/Generic/expand-vp.ll
    llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
    llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll
    llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll
    llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll
    llvm/test/Transforms/InstCombine/div.ll
    llvm/test/Transforms/InstCombine/fdiv.ll
    llvm/test/Transforms/InstCombine/fmul.ll
    llvm/test/Transforms/InstCombine/select.ll
    llvm/test/Transforms/InstCombine/sub.ll
    llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
    llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
    llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
    llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll
    llvm/test/Transforms/InstSimplify/gep.ll
    llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll
    llvm/test/Transforms/InstSimplify/vscale.ll
    llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
    llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
    llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
    llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
    llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
    llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
    llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
    llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
    llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll
    llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll
    llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
    llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
    llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
    llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
    llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
    llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll
    llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
    llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
    llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll
    llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
    llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll
    llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll
    llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
    llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
    llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
    llvm/test/Transforms/LoopVectorize/X86/optsize.ll
    llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
    llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
    llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
    llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
    llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
    llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll
    llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
    llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
    llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
    llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
    llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
    llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
    llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
    llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
    llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
    llvm/test/Transforms/LoopVectorize/induction-step.ll
    llvm/test/Transforms/LoopVectorize/induction.ll
    llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll
    llvm/test/Transforms/LoopVectorize/loop-form.ll
    llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
    llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll
    llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
    llvm/test/Transforms/LoopVectorize/optsize.ll
    llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
    llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
    llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
    llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
    llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll
    llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
    llvm/test/Transforms/LoopVectorize/pr45259.ll
    llvm/test/Transforms/LoopVectorize/pr50686.ll
    llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
    llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
    llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
    llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
    llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
    llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll
    llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
    llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
    llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
    llvm/test/Transforms/LoopVectorize/select-cmp.ll
    llvm/test/Transforms/LoopVectorize/select-reduction.ll
    llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
    llvm/test/Transforms/LoopVectorize/uniform-blend.ll
    llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/dot-product.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-left-transpose-row-major.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts-iterator-invalidation.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll
    llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll
    llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll
    llvm/test/Transforms/Scalarizer/vector-gep.ll
    mlir/test/Target/LLVMIR/llvmir.mlir
    polly/test/CodeGen/invariant_load_hoist_alignment.ll
    polly/test/CodeGen/simple_vec_cast.ll
    polly/test/CodeGen/simple_vec_const.ll
    polly/test/CodeGen/simple_vec_ptr_ptr_ty.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
index 8a2ad8fd8a674..484f83aceb89c 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
@@ -1551,11 +1551,11 @@ vector float test_vec_vec_splati_f(void) {
 
 vector double test_vec_vec_splatid(void) {
   // CHECK-BE: [[T1:%.+]] = fpext float %{{.+}} to double
-  // CHECK-BE-NEXT: [[T2:%.+]] = insertelement <2 x double> poison, double [[T1:%.+]], i32 0
+  // CHECK-BE-NEXT: [[T2:%.+]] = insertelement <2 x double> poison, double [[T1:%.+]], i64 0
   // CHECK-BE-NEXT: [[T3:%.+]] = shufflevector <2 x double> [[T2:%.+]], <2 x double> poison, <2 x i32> zeroinitialize
   // CHECK-BE-NEXT: ret <2 x double> [[T3:%.+]]
   // CHECK-LE: [[T1:%.+]] = fpext float %{{.+}} to double
-  // CHECK-LE-NEXT: [[T2:%.+]] = insertelement <2 x double> poison, double [[T1:%.+]], i32 0
+  // CHECK-LE-NEXT: [[T2:%.+]] = insertelement <2 x double> poison, double [[T1:%.+]], i64 0
   // CHECK-LE-NEXT: [[T3:%.+]] = shufflevector <2 x double> [[T2:%.+]], <2 x double> poison, <2 x i32> zeroinitialize
   // CHECK-LE-NEXT: ret <2 x double> [[T3:%.+]]
   return vec_splatid(1.0);

diff  --git a/clang/test/CodeGen/SystemZ/zvector.c b/clang/test/CodeGen/SystemZ/zvector.c
index e07b2d9108d28..50fae1184d238 100644
--- a/clang/test/CodeGen/SystemZ/zvector.c
+++ b/clang/test/CodeGen/SystemZ/zvector.c
@@ -1924,7 +1924,7 @@ void test_xor_assign(void) {
 // CHECK:   store volatile <16 x i8> [[SHL1]], ptr @sc, align 8
 // CHECK:   [[TMP4:%.*]] = load volatile <16 x i8>, ptr @sc, align 8
 // CHECK:   [[TMP5:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP5]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP5]], i64 0
 // CHECK:   [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK:   [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8>
 // CHECK:   [[SHL2:%.*]] = shl <16 x i8> [[TMP4]], [[SH_PROM]]
@@ -1942,7 +1942,7 @@ void test_xor_assign(void) {
 // CHECK:   store volatile <16 x i8> [[SHL5]], ptr @uc, align 8
 // CHECK:   [[TMP11:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
 // CHECK:   [[TMP12:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP12]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP12]], i64 0
 // CHECK:   [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK:   [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8>
 // CHECK:   [[SHL9:%.*]] = shl <16 x i8> [[TMP11]], [[SH_PROM8]]
@@ -1960,7 +1960,7 @@ void test_xor_assign(void) {
 // CHECK:   store volatile <8 x i16> [[SHL12]], ptr @ss, align 8
 // CHECK:   [[TMP18:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
 // CHECK:   [[TMP19:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i64 0
 // CHECK:   [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK:   [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16>
 // CHECK:   [[SHL16:%.*]] = shl <8 x i16> [[TMP18]], [[SH_PROM15]]
@@ -1978,7 +1978,7 @@ void test_xor_assign(void) {
 // CHECK:   store volatile <8 x i16> [[SHL19]], ptr @us, align 8
 // CHECK:   [[TMP25:%.*]] = load volatile <8 x i16>, ptr @us, align 8
 // CHECK:   [[TMP26:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP26]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP26]], i64 0
 // CHECK:   [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK:   [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16>
 // CHECK:   [[SHL23:%.*]] = shl <8 x i16> [[TMP25]], [[SH_PROM22]]
@@ -1996,7 +1996,7 @@ void test_xor_assign(void) {
 // CHECK:   store volatile <4 x i32> [[SHL26]], ptr @si, align 8
 // CHECK:   [[TMP32:%.*]] = load volatile <4 x i32>, ptr @si, align 8
 // CHECK:   [[TMP33:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP33]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP33]], i64 0
 // CHECK:   [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK:   [[SHL29:%.*]] = shl <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]]
 // CHECK:   store volatile <4 x i32> [[SHL29]], ptr @si, align 8
@@ -2013,7 +2013,7 @@ void test_xor_assign(void) {
 // CHECK:   store volatile <4 x i32> [[SHL32]], ptr @ui, align 8
 // CHECK:   [[TMP39:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
 // CHECK:   [[TMP40:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP40]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP40]], i64 0
 // CHECK:   [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK:   [[SHL35:%.*]] = shl <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]]
 // CHECK:   store volatile <4 x i32> [[SHL35]], ptr @ui, align 8
@@ -2030,7 +2030,7 @@ void test_xor_assign(void) {
 // CHECK:   store volatile <2 x i64> [[SHL38]], ptr @sl, align 8
 // CHECK:   [[TMP46:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
 // CHECK:   [[TMP47:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP47]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP47]], i64 0
 // CHECK:   [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer
 // CHECK:   [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64>
 // CHECK:   [[SHL42:%.*]] = shl <2 x i64> [[TMP46]], [[SH_PROM41]]
@@ -2048,7 +2048,7 @@ void test_xor_assign(void) {
 // CHECK:   store volatile <2 x i64> [[SHL45]], ptr @ul, align 8
 // CHECK:   [[TMP53:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
 // CHECK:   [[TMP54:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP54]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP54]], i64 0
 // CHECK:   [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer
 // CHECK:   [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64>
 // CHECK:   [[SHL49:%.*]] = shl <2 x i64> [[TMP53]], [[SH_PROM48]]
@@ -2106,7 +2106,7 @@ void test_sl(void) {
 // CHECK:   [[SHL1:%.*]] = shl <16 x i8> [[TMP3]], [[TMP2]]
 // CHECK:   store volatile <16 x i8> [[SHL1]], ptr @sc, align 8
 // CHECK:   [[TMP4:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP4]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP4]], i64 0
 // CHECK:   [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK:   [[TMP5:%.*]] = load volatile <16 x i8>, ptr @sc, align 8
 // CHECK:   [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8>
@@ -2124,7 +2124,7 @@ void test_sl(void) {
 // CHECK:   [[SHL5:%.*]] = shl <16 x i8> [[TMP10]], [[TMP9]]
 // CHECK:   store volatile <16 x i8> [[SHL5]], ptr @uc, align 8
 // CHECK:   [[TMP11:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP11]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP11]], i64 0
 // CHECK:   [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK:   [[TMP12:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
 // CHECK:   [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8>
@@ -2142,7 +2142,7 @@ void test_sl(void) {
 // CHECK:   [[SHL12:%.*]] = shl <8 x i16> [[TMP17]], [[TMP16]]
 // CHECK:   store volatile <8 x i16> [[SHL12]], ptr @ss, align 8
 // CHECK:   [[TMP18:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP18]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP18]], i64 0
 // CHECK:   [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK:   [[TMP19:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
 // CHECK:   [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16>
@@ -2160,7 +2160,7 @@ void test_sl(void) {
 // CHECK:   [[SHL19:%.*]] = shl <8 x i16> [[TMP24]], [[TMP23]]
 // CHECK:   store volatile <8 x i16> [[SHL19]], ptr @us, align 8
 // CHECK:   [[TMP25:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP25]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP25]], i64 0
 // CHECK:   [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK:   [[TMP26:%.*]] = load volatile <8 x i16>, ptr @us, align 8
 // CHECK:   [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16>
@@ -2178,7 +2178,7 @@ void test_sl(void) {
 // CHECK:   [[SHL26:%.*]] = shl <4 x i32> [[TMP31]], [[TMP30]]
 // CHECK:   store volatile <4 x i32> [[SHL26]], ptr @si, align 8
 // CHECK:   [[TMP32:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP32]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP32]], i64 0
 // CHECK:   [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK:   [[TMP33:%.*]] = load volatile <4 x i32>, ptr @si, align 8
 // CHECK:   [[SHL29:%.*]] = shl <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]]
@@ -2195,7 +2195,7 @@ void test_sl(void) {
 // CHECK:   [[SHL32:%.*]] = shl <4 x i32> [[TMP38]], [[TMP37]]
 // CHECK:   store volatile <4 x i32> [[SHL32]], ptr @ui, align 8
 // CHECK:   [[TMP39:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP39]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP39]], i64 0
 // CHECK:   [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK:   [[TMP40:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
 // CHECK:   [[SHL35:%.*]] = shl <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]]
@@ -2212,7 +2212,7 @@ void test_sl(void) {
 // CHECK:   [[SHL38:%.*]] = shl <2 x i64> [[TMP45]], [[TMP44]]
 // CHECK:   store volatile <2 x i64> [[SHL38]], ptr @sl, align 8
 // CHECK:   [[TMP46:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP46]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP46]], i64 0
 // CHECK:   [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer
 // CHECK:   [[TMP47:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
 // CHECK:   [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64>
@@ -2230,7 +2230,7 @@ void test_sl(void) {
 // CHECK:   [[SHL45:%.*]] = shl <2 x i64> [[TMP52]], [[TMP51]]
 // CHECK:   store volatile <2 x i64> [[SHL45]], ptr @ul, align 8
 // CHECK:   [[TMP53:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP53]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP53]], i64 0
 // CHECK:   [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer
 // CHECK:   [[TMP54:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
 // CHECK:   [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64>
@@ -2290,7 +2290,7 @@ void test_sl_assign(void) {
 // CHECK:   store volatile <16 x i8> [[SHR1]], ptr @sc, align 8
 // CHECK:   [[TMP4:%.*]] = load volatile <16 x i8>, ptr @sc, align 8
 // CHECK:   [[TMP5:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP5]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP5]], i64 0
 // CHECK:   [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK:   [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8>
 // CHECK:   [[SHR2:%.*]] = ashr <16 x i8> [[TMP4]], [[SH_PROM]]
@@ -2308,7 +2308,7 @@ void test_sl_assign(void) {
 // CHECK:   store volatile <16 x i8> [[SHR5]], ptr @uc, align 8
 // CHECK:   [[TMP11:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
 // CHECK:   [[TMP12:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP12]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP12]], i64 0
 // CHECK:   [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK:   [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8>
 // CHECK:   [[SHR9:%.*]] = lshr <16 x i8> [[TMP11]], [[SH_PROM8]]
@@ -2326,7 +2326,7 @@ void test_sl_assign(void) {
 // CHECK:   store volatile <8 x i16> [[SHR12]], ptr @ss, align 8
 // CHECK:   [[TMP18:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
 // CHECK:   [[TMP19:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP19]], i64 0
 // CHECK:   [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK:   [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16>
 // CHECK:   [[SHR16:%.*]] = ashr <8 x i16> [[TMP18]], [[SH_PROM15]]
@@ -2344,7 +2344,7 @@ void test_sl_assign(void) {
 // CHECK:   store volatile <8 x i16> [[SHR19]], ptr @us, align 8
 // CHECK:   [[TMP25:%.*]] = load volatile <8 x i16>, ptr @us, align 8
 // CHECK:   [[TMP26:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP26]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP26]], i64 0
 // CHECK:   [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK:   [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16>
 // CHECK:   [[SHR23:%.*]] = lshr <8 x i16> [[TMP25]], [[SH_PROM22]]
@@ -2362,7 +2362,7 @@ void test_sl_assign(void) {
 // CHECK:   store volatile <4 x i32> [[SHR26]], ptr @si, align 8
 // CHECK:   [[TMP32:%.*]] = load volatile <4 x i32>, ptr @si, align 8
 // CHECK:   [[TMP33:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP33]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP33]], i64 0
 // CHECK:   [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK:   [[SHR29:%.*]] = ashr <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]]
 // CHECK:   store volatile <4 x i32> [[SHR29]], ptr @si, align 8
@@ -2379,7 +2379,7 @@ void test_sl_assign(void) {
 // CHECK:   store volatile <4 x i32> [[SHR32]], ptr @ui, align 8
 // CHECK:   [[TMP39:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
 // CHECK:   [[TMP40:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP40]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP40]], i64 0
 // CHECK:   [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK:   [[SHR35:%.*]] = lshr <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]]
 // CHECK:   store volatile <4 x i32> [[SHR35]], ptr @ui, align 8
@@ -2396,7 +2396,7 @@ void test_sl_assign(void) {
 // CHECK:   store volatile <2 x i64> [[SHR38]], ptr @sl, align 8
 // CHECK:   [[TMP46:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
 // CHECK:   [[TMP47:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP47]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP47]], i64 0
 // CHECK:   [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer
 // CHECK:   [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64>
 // CHECK:   [[SHR42:%.*]] = ashr <2 x i64> [[TMP46]], [[SH_PROM41]]
@@ -2414,7 +2414,7 @@ void test_sl_assign(void) {
 // CHECK:   store volatile <2 x i64> [[SHR45]], ptr @ul, align 8
 // CHECK:   [[TMP53:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
 // CHECK:   [[TMP54:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP54]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP54]], i64 0
 // CHECK:   [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer
 // CHECK:   [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64>
 // CHECK:   [[SHR49:%.*]] = lshr <2 x i64> [[TMP53]], [[SH_PROM48]]
@@ -2472,7 +2472,7 @@ void test_sr(void) {
 // CHECK:   [[SHR1:%.*]] = ashr <16 x i8> [[TMP3]], [[TMP2]]
 // CHECK:   store volatile <16 x i8> [[SHR1]], ptr @sc, align 8
 // CHECK:   [[TMP4:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP4]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP4]], i64 0
 // CHECK:   [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLAT:%.*]]insert, <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK:   [[TMP5:%.*]] = load volatile <16 x i8>, ptr @sc, align 8
 // CHECK:   [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i8>
@@ -2490,7 +2490,7 @@ void test_sr(void) {
 // CHECK:   [[SHR5:%.*]] = lshr <16 x i8> [[TMP10]], [[TMP9]]
 // CHECK:   store volatile <16 x i8> [[SHR5]], ptr @uc, align 8
 // CHECK:   [[TMP11:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP11]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT6:%.*]] = insertelement <16 x i32> poison, i32 [[TMP11]], i64 0
 // CHECK:   [[SPLAT_SPLAT7:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT6]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK:   [[TMP12:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
 // CHECK:   [[SH_PROM8:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT7]] to <16 x i8>
@@ -2508,7 +2508,7 @@ void test_sr(void) {
 // CHECK:   [[SHR12:%.*]] = ashr <8 x i16> [[TMP17]], [[TMP16]]
 // CHECK:   store volatile <8 x i16> [[SHR12]], ptr @ss, align 8
 // CHECK:   [[TMP18:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP18]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[TMP18]], i64 0
 // CHECK:   [[SPLAT_SPLAT14:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK:   [[TMP19:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
 // CHECK:   [[SH_PROM15:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT14]] to <8 x i16>
@@ -2526,7 +2526,7 @@ void test_sr(void) {
 // CHECK:   [[SHR19:%.*]] = lshr <8 x i16> [[TMP24]], [[TMP23]]
 // CHECK:   store volatile <8 x i16> [[SHR19]], ptr @us, align 8
 // CHECK:   [[TMP25:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP25]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[TMP25]], i64 0
 // CHECK:   [[SPLAT_SPLAT21:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK:   [[TMP26:%.*]] = load volatile <8 x i16>, ptr @us, align 8
 // CHECK:   [[SH_PROM22:%.*]] = trunc <8 x i32> [[SPLAT_SPLAT21]] to <8 x i16>
@@ -2544,7 +2544,7 @@ void test_sr(void) {
 // CHECK:   [[SHR26:%.*]] = ashr <4 x i32> [[TMP31]], [[TMP30]]
 // CHECK:   store volatile <4 x i32> [[SHR26]], ptr @si, align 8
 // CHECK:   [[TMP32:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP32]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP32]], i64 0
 // CHECK:   [[SPLAT_SPLAT28:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT27]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK:   [[TMP33:%.*]] = load volatile <4 x i32>, ptr @si, align 8
 // CHECK:   [[SHR29:%.*]] = ashr <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]]
@@ -2561,7 +2561,7 @@ void test_sr(void) {
 // CHECK:   [[SHR32:%.*]] = lshr <4 x i32> [[TMP38]], [[TMP37]]
 // CHECK:   store volatile <4 x i32> [[SHR32]], ptr @ui, align 8
 // CHECK:   [[TMP39:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP39]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP39]], i64 0
 // CHECK:   [[SPLAT_SPLAT34:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT33]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK:   [[TMP40:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
 // CHECK:   [[SHR35:%.*]] = lshr <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]]
@@ -2578,7 +2578,7 @@ void test_sr(void) {
 // CHECK:   [[SHR38:%.*]] = ashr <2 x i64> [[TMP45]], [[TMP44]]
 // CHECK:   store volatile <2 x i64> [[SHR38]], ptr @sl, align 8
 // CHECK:   [[TMP46:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP46]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT39:%.*]] = insertelement <2 x i32> poison, i32 [[TMP46]], i64 0
 // CHECK:   [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT39]], <2 x i32> poison, <2 x i32> zeroinitializer
 // CHECK:   [[TMP47:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
 // CHECK:   [[SH_PROM41:%.*]] = zext <2 x i32> [[SPLAT_SPLAT40]] to <2 x i64>
@@ -2596,7 +2596,7 @@ void test_sr(void) {
 // CHECK:   [[SHR45:%.*]] = lshr <2 x i64> [[TMP52]], [[TMP51]]
 // CHECK:   store volatile <2 x i64> [[SHR45]], ptr @ul, align 8
 // CHECK:   [[TMP53:%.*]] = load volatile i32, ptr @cnt, align 4
-// CHECK:   [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP53]], i32 0
+// CHECK:   [[SPLAT_SPLATINSERT46:%.*]] = insertelement <2 x i32> poison, i32 [[TMP53]], i64 0
 // CHECK:   [[SPLAT_SPLAT47:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT46]], <2 x i32> poison, <2 x i32> zeroinitializer
 // CHECK:   [[TMP54:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
 // CHECK:   [[SH_PROM48:%.*]] = zext <2 x i32> [[SPLAT_SPLAT47]] to <2 x i64>

diff  --git a/clang/test/CodeGen/aarch64-sve-vector-arith-ops.c b/clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
index 4367e5c1e2b1c..ed4d7c5dbef45 100644
--- a/clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
+++ b/clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
@@ -209,7 +209,7 @@ svfloat64_t add_inplace_f64(svfloat64_t a, svfloat64_t b) {
 
 // CHECK-LABEL: @add_scalar_i8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[ADD]]
@@ -220,7 +220,7 @@ svint8_t add_scalar_i8(svint8_t a, int8_t b) {
 
 // CHECK-LABEL: @add_scalar_i16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[ADD]]
@@ -231,7 +231,7 @@ svint16_t add_scalar_i16(svint16_t a, int16_t b) {
 
 // CHECK-LABEL: @add_scalar_i32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
@@ -242,7 +242,7 @@ svint32_t add_scalar_i32(svint32_t a, int32_t b) {
 
 // CHECK-LABEL: @add_scalar_i64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
@@ -253,7 +253,7 @@ svint64_t add_scalar_i64(svint64_t a, int64_t b) {
 
 // CHECK-LABEL: @add_scalar_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[ADD]]
@@ -264,7 +264,7 @@ svuint8_t add_scalar_u8(svuint8_t a, uint8_t b) {
 
 // CHECK-LABEL: @add_scalar_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[ADD]]
@@ -275,7 +275,7 @@ svuint16_t add_scalar_u16(svuint16_t a, uint16_t b) {
 
 // CHECK-LABEL: @add_scalar_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
@@ -286,7 +286,7 @@ svuint32_t add_scalar_u32(svuint32_t a, uint32_t b) {
 
 // CHECK-LABEL: @add_scalar_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
@@ -297,7 +297,7 @@ svuint64_t add_scalar_u64(svuint64_t a, uint64_t b) {
 
 // CHECK-LABEL: @add_scalar_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x half> [[SPLAT_SPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <vscale x 8 x half> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x half> [[ADD]]
@@ -308,7 +308,7 @@ svfloat16_t add_scalar_f16(svfloat16_t a, __fp16 b) {
 
 // CHECK-LABEL: @add_scalar_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[SPLAT_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <vscale x 4 x float> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x float> [[ADD]]
@@ -319,7 +319,7 @@ svfloat32_t add_scalar_f32(svfloat32_t a, float b) {
 
 // CHECK-LABEL: @add_scalar_f64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[SPLAT_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <vscale x 2 x double> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x double> [[ADD]]
@@ -656,7 +656,7 @@ svfloat64_t sub_inplace_f64(svfloat64_t a, svfloat64_t b) {
 
 // CHECK-LABEL: @sub_scalar_i8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SUB]]
@@ -667,7 +667,7 @@ svint8_t sub_scalar_i8(svint8_t a, int8_t b) {
 
 // CHECK-LABEL: @sub_scalar_i16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SUB]]
@@ -678,7 +678,7 @@ svint16_t sub_scalar_i16(svint16_t a, int16_t b) {
 
 // CHECK-LABEL: @sub_scalar_i32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SUB]]
@@ -689,7 +689,7 @@ svint32_t sub_scalar_i32(svint32_t a, int32_t b) {
 
 // CHECK-LABEL: @sub_scalar_i64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SUB]]
@@ -700,7 +700,7 @@ svint64_t sub_scalar_i64(svint64_t a, int64_t b) {
 
 // CHECK-LABEL: @sub_scalar_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SUB]]
@@ -711,7 +711,7 @@ svuint8_t sub_scalar_u8(svuint8_t a, uint8_t b) {
 
 // CHECK-LABEL: @sub_scalar_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SUB]]
@@ -722,7 +722,7 @@ svuint16_t sub_scalar_u16(svuint16_t a, uint16_t b) {
 
 // CHECK-LABEL: @sub_scalar_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SUB]]
@@ -733,7 +733,7 @@ svuint32_t sub_scalar_u32(svuint32_t a, uint32_t b) {
 
 // CHECK-LABEL: @sub_scalar_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SUB]]
@@ -744,7 +744,7 @@ svuint64_t sub_scalar_u64(svuint64_t a, uint64_t b) {
 
 // CHECK-LABEL: @sub_scalar_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x half> [[SPLAT_SPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <vscale x 8 x half> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x half> [[SUB]]
@@ -755,7 +755,7 @@ svfloat16_t sub_scalar_f16(svfloat16_t a, __fp16 b) {
 
 // CHECK-LABEL: @sub_scalar_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[SPLAT_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <vscale x 4 x float> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x float> [[SUB]]
@@ -766,7 +766,7 @@ svfloat32_t sub_scalar_f32(svfloat32_t a, float b) {
 
 // CHECK-LABEL: @sub_scalar_f64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[SPLAT_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <vscale x 2 x double> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x double> [[SUB]]
@@ -977,7 +977,7 @@ svfloat64_t mul_inplace_f64(svfloat64_t a, svfloat64_t b) {
 
 // CHECK-LABEL: @mul_scalar_i8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[MUL]]
@@ -988,7 +988,7 @@ svint8_t mul_scalar_i8(svint8_t a, int8_t b) {
 
 // CHECK-LABEL: @mul_scalar_i16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[MUL]]
@@ -999,7 +999,7 @@ svint16_t mul_scalar_i16(svint16_t a, int16_t b) {
 
 // CHECK-LABEL: @mul_scalar_i32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
@@ -1010,7 +1010,7 @@ svint32_t mul_scalar_i32(svint32_t a, int32_t b) {
 
 // CHECK-LABEL: @mul_scalar_i64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[MUL]]
@@ -1021,7 +1021,7 @@ svint64_t mul_scalar_i64(svint64_t a, int64_t b) {
 
 // CHECK-LABEL: @mul_scalar_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[MUL]]
@@ -1032,7 +1032,7 @@ svuint8_t mul_scalar_u8(svuint8_t a, uint8_t b) {
 
 // CHECK-LABEL: @mul_scalar_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[MUL]]
@@ -1043,7 +1043,7 @@ svuint16_t mul_scalar_u16(svuint16_t a, uint16_t b) {
 
 // CHECK-LABEL: @mul_scalar_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
@@ -1054,7 +1054,7 @@ svuint32_t mul_scalar_u32(svuint32_t a, uint32_t b) {
 
 // CHECK-LABEL: @mul_scalar_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[MUL]]
@@ -1065,7 +1065,7 @@ svuint64_t mul_scalar_u64(svuint64_t a, uint64_t b) {
 
 // CHECK-LABEL: @mul_scalar_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x half> [[SPLAT_SPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <vscale x 8 x half> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x half> [[MUL]]
@@ -1076,7 +1076,7 @@ svfloat16_t mul_scalar_f16(svfloat16_t a, __fp16 b) {
 
 // CHECK-LABEL: @mul_scalar_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[SPLAT_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <vscale x 4 x float> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x float> [[MUL]]
@@ -1087,7 +1087,7 @@ svfloat32_t mul_scalar_f32(svfloat32_t a, float b) {
 
 // CHECK-LABEL: @mul_scalar_f64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[SPLAT_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <vscale x 2 x double> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x double> [[MUL]]
@@ -1298,7 +1298,7 @@ svfloat64_t div_inplace_f64(svfloat64_t a, svfloat64_t b) {
 
 // CHECK-LABEL: @div_scalar_i8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[DIV]]
@@ -1309,7 +1309,7 @@ svint8_t div_scalar_i8(svint8_t a, int8_t b) {
 
 // CHECK-LABEL: @div_scalar_i16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[DIV]]
@@ -1320,7 +1320,7 @@ svint16_t div_scalar_i16(svint16_t a, int16_t b) {
 
 // CHECK-LABEL: @div_scalar_i32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
@@ -1331,7 +1331,7 @@ svint32_t div_scalar_i32(svint32_t a, int32_t b) {
 
 // CHECK-LABEL: @div_scalar_i64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[DIV]]
@@ -1342,7 +1342,7 @@ svint64_t div_scalar_i64(svint64_t a, int64_t b) {
 
 // CHECK-LABEL: @div_scalar_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[DIV]]
@@ -1353,7 +1353,7 @@ svuint8_t div_scalar_u8(svuint8_t a, uint8_t b) {
 
 // CHECK-LABEL: @div_scalar_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[DIV]]
@@ -1364,7 +1364,7 @@ svuint16_t div_scalar_u16(svuint16_t a, uint16_t b) {
 
 // CHECK-LABEL: @div_scalar_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
@@ -1375,7 +1375,7 @@ svuint32_t div_scalar_u32(svuint32_t a, uint32_t b) {
 
 // CHECK-LABEL: @div_scalar_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[DIV]]
@@ -1386,7 +1386,7 @@ svuint64_t div_scalar_u64(svuint64_t a, uint64_t b) {
 
 // CHECK-LABEL: @div_scalar_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x half> [[SPLAT_SPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <vscale x 8 x half> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x half> [[DIV]]
@@ -1397,7 +1397,7 @@ svfloat16_t div_scalar_f16(svfloat16_t a, __fp16 b) {
 
 // CHECK-LABEL: @div_scalar_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[SPLAT_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <vscale x 4 x float> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x float> [[DIV]]
@@ -1408,7 +1408,7 @@ svfloat32_t div_scalar_f32(svfloat32_t a, float b) {
 
 // CHECK-LABEL: @div_scalar_f64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[SPLAT_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <vscale x 2 x double> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x double> [[DIV]]
@@ -1565,7 +1565,7 @@ svuint64_t rem_inplace_u64(svuint64_t a, svuint64_t b) {
 
 // CHECK-LABEL: @rem_scalar_i8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[REM]]
@@ -1576,7 +1576,7 @@ svint8_t rem_scalar_i8(svint8_t a, int8_t b) {
 
 // CHECK-LABEL: @rem_scalar_i16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[REM]]
@@ -1587,7 +1587,7 @@ svint16_t rem_scalar_i16(svint16_t a, int16_t b) {
 
 // CHECK-LABEL: @rem_scalar_i32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[REM]]
@@ -1598,7 +1598,7 @@ svint32_t rem_scalar_i32(svint32_t a, int32_t b) {
 
 // CHECK-LABEL: @rem_scalar_i64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[REM]]
@@ -1609,7 +1609,7 @@ svint64_t rem_scalar_i64(svint64_t a, int64_t b) {
 
 // CHECK-LABEL: @rem_scalar_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[REM]]
@@ -1620,7 +1620,7 @@ svuint8_t rem_scalar_u8(svuint8_t a, uint8_t b) {
 
 // CHECK-LABEL: @rem_scalar_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[REM]]
@@ -1631,7 +1631,7 @@ svuint16_t rem_scalar_u16(svuint16_t a, uint16_t b) {
 
 // CHECK-LABEL: @rem_scalar_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[REM]]
@@ -1642,7 +1642,7 @@ svuint32_t rem_scalar_u32(svuint32_t a, uint32_t b) {
 
 // CHECK-LABEL: @rem_scalar_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[REM]]

diff  --git a/clang/test/CodeGen/aarch64-sve-vector-bitwise-ops.c b/clang/test/CodeGen/aarch64-sve-vector-bitwise-ops.c
index d39c0c27c74e0..5b97ce44e8736 100644
--- a/clang/test/CodeGen/aarch64-sve-vector-bitwise-ops.c
+++ b/clang/test/CodeGen/aarch64-sve-vector-bitwise-ops.c
@@ -260,7 +260,7 @@ svuint64_t xor_u64(svuint64_t a, svuint64_t b) {
 
 // CHECK-LABEL: @neg_bool(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 16 x i1> [[A:%.*]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i32 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 16 x i1> [[A:%.*]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[NEG]]
 //
 svbool_t neg_bool(svbool_t a) {
@@ -269,7 +269,7 @@ svbool_t neg_bool(svbool_t a) {
 
 // CHECK-LABEL: @neg_i8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 16 x i8> [[A:%.*]], shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -1, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 16 x i8> [[A:%.*]], shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[NEG]]
 //
 svint8_t neg_i8(svint8_t a) {
@@ -278,7 +278,7 @@ svint8_t neg_i8(svint8_t a) {
 
 // CHECK-LABEL: @neg_i16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 8 x i16> [[A:%.*]], shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 8 x i16> [[A:%.*]], shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[NEG]]
 //
 svint16_t neg_i16(svint16_t a) {
@@ -287,7 +287,7 @@ svint16_t neg_i16(svint16_t a) {
 
 // CHECK-LABEL: @neg_i32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 4 x i32> [[A:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 4 x i32> [[A:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[NEG]]
 //
 svint32_t neg_i32(svint32_t a) {
@@ -296,7 +296,7 @@ svint32_t neg_i32(svint32_t a) {
 
 // CHECK-LABEL: @neg_i64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 2 x i64> [[A:%.*]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 2 x i64> [[A:%.*]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[NEG]]
 //
 svint64_t neg_i64(svint64_t a) {
@@ -305,7 +305,7 @@ svint64_t neg_i64(svint64_t a) {
 
 // CHECK-LABEL: @neg_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 16 x i8> [[A:%.*]], shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -1, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 16 x i8> [[A:%.*]], shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 -1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[NEG]]
 //
 svuint8_t neg_u8(svuint8_t a) {
@@ -314,7 +314,7 @@ svuint8_t neg_u8(svuint8_t a) {
 
 // CHECK-LABEL: @neg_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 8 x i16> [[A:%.*]], shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 8 x i16> [[A:%.*]], shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[NEG]]
 //
 svuint16_t neg_u16(svuint16_t a) {
@@ -323,7 +323,7 @@ svuint16_t neg_u16(svuint16_t a) {
 
 // CHECK-LABEL: @neg_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 4 x i32> [[A:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 4 x i32> [[A:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[NEG]]
 //
 svuint32_t neg_u32(svuint32_t a) {
@@ -332,7 +332,7 @@ svuint32_t neg_u32(svuint32_t a) {
 
 // CHECK-LABEL: @neg_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 2 x i64> [[A:%.*]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[NEG:%.*]] = xor <vscale x 2 x i64> [[A:%.*]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 -1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[NEG]]
 //
 svuint64_t neg_u64(svuint64_t a) {

diff  --git a/clang/test/CodeGen/aarch64-sve-vector-shift-ops.c b/clang/test/CodeGen/aarch64-sve-vector-shift-ops.c
index cfb85d9c33786..d6b7f76508741 100644
--- a/clang/test/CodeGen/aarch64-sve-vector-shift-ops.c
+++ b/clang/test/CodeGen/aarch64-sve-vector-shift-ops.c
@@ -153,7 +153,7 @@ svuint64_t rshift_u64(svuint64_t a, svuint64_t b) {
 
 // CHECK-LABEL: @lshift_i8_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SHL]]
@@ -164,7 +164,7 @@ svint8_t lshift_i8_rsplat(svint8_t a, int8_t b) {
 
 // CHECK-LABEL: @lshift_i8_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 16 x i8> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SHL]]
@@ -175,7 +175,7 @@ svint8_t lshift_i8_lsplat(svint8_t a, int8_t b) {
 
 // CHECK-LABEL: @rshift_i8_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SHR]]
@@ -186,7 +186,7 @@ svint8_t rshift_i8_rsplat(svint8_t a, int8_t b) {
 
 // CHECK-LABEL: @rshift_i8_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <vscale x 16 x i8> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SHR]]
@@ -197,7 +197,7 @@ svint8_t rshift_i8_lsplat(svint8_t a, int8_t b) {
 
 // CHECK-LABEL: @lshift_u8_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SHL]]
@@ -208,7 +208,7 @@ svuint8_t lshift_u8_rsplat(svuint8_t a, uint8_t b) {
 
 // CHECK-LABEL: @lshift_u8_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 16 x i8> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SHL]]
@@ -219,7 +219,7 @@ svuint8_t lshift_u8_lsplat(svuint8_t a, uint8_t b) {
 
 // CHECK-LABEL: @rshift_u8_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SHR]]
@@ -230,7 +230,7 @@ svuint8_t rshift_u8_rsplat(svuint8_t a, uint8_t b) {
 
 // CHECK-LABEL: @rshift_u8_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <vscale x 16 x i8> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SHR]]
@@ -241,7 +241,7 @@ svuint8_t rshift_u8_lsplat(svuint8_t a, uint8_t b) {
 
 // CHECK-LABEL: @lshift_i16_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SHL]]
@@ -252,7 +252,7 @@ svint16_t lshift_i16_rsplat(svint16_t a, int16_t b) {
 
 // CHECK-LABEL: @lshift_i16_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 8 x i16> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SHL]]
@@ -263,7 +263,7 @@ svint16_t lshift_i16_lsplat(svint16_t a, int16_t b) {
 
 // CHECK-LABEL: @rshift_i16_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
@@ -274,7 +274,7 @@ svint16_t rshift_i16_rsplat(svint16_t a, int16_t b) {
 
 // CHECK-LABEL: @rshift_i16_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <vscale x 8 x i16> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
@@ -285,7 +285,7 @@ svint16_t rshift_i16_lsplat(svint16_t a, int16_t b) {
 
 // CHECK-LABEL: @lshift_u16_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SHL]]
@@ -296,7 +296,7 @@ svuint16_t lshift_u16_rsplat(svuint16_t a, uint16_t b) {
 
 // CHECK-LABEL: @lshift_u16_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 8 x i16> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SHL]]
@@ -307,7 +307,7 @@ svuint16_t lshift_u16_lsplat(svuint16_t a, uint16_t b) {
 
 // CHECK-LABEL: @rshift_u16_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
@@ -318,7 +318,7 @@ svuint16_t rshift_u16_rsplat(svuint16_t a, uint16_t b) {
 
 // CHECK-LABEL: @rshift_u16_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <vscale x 8 x i16> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
@@ -329,7 +329,7 @@ svuint16_t rshift_u16_lsplat(svuint16_t a, uint16_t b) {
 
 // CHECK-LABEL: @lshift_i32_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SHL]]
@@ -340,7 +340,7 @@ svint32_t lshift_i32_rsplat(svint32_t a, int32_t b) {
 
 // CHECK-LABEL: @lshift_i32_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 4 x i32> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SHL]]
@@ -351,7 +351,7 @@ svint32_t lshift_i32_lsplat(svint32_t a, int32_t b) {
 
 // CHECK-LABEL: @rshift_i32_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SHR]]
@@ -362,7 +362,7 @@ svint32_t rshift_i32_rsplat(svint32_t a, int32_t b) {
 
 // CHECK-LABEL: @rshift_i32_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <vscale x 4 x i32> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SHR]]
@@ -373,7 +373,7 @@ svint32_t rshift_i32_lsplat(svint32_t a, int32_t b) {
 
 // CHECK-LABEL: @lshift_u32_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SHL]]
@@ -384,7 +384,7 @@ svuint32_t lshift_u32_rsplat(svuint32_t a, uint32_t b) {
 
 // CHECK-LABEL: @lshift_u32_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 4 x i32> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SHL]]
@@ -395,7 +395,7 @@ svuint32_t lshift_u32_lsplat(svuint32_t a, uint32_t b) {
 
 // CHECK-LABEL: @rshift_u32_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SHR]]
@@ -406,7 +406,7 @@ svuint32_t rshift_u32_rsplat(svuint32_t a, uint32_t b) {
 
 // CHECK-LABEL: @rshift_u32_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <vscale x 4 x i32> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SHR]]
@@ -417,7 +417,7 @@ svuint32_t rshift_u32_lsplat(svuint32_t a, uint32_t b) {
 
 // CHECK-LABEL: @lshift_i64_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SHL]]
@@ -428,7 +428,7 @@ svint64_t lshift_i64_rsplat(svint64_t a, int64_t b) {
 
 // CHECK-LABEL: @lshift_i64_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 2 x i64> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SHL]]
@@ -439,7 +439,7 @@ svint64_t lshift_i64_lsplat(svint64_t a, int64_t b) {
 
 // CHECK-LABEL: @rshift_i64_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SHR]]
@@ -450,7 +450,7 @@ svint64_t rshift_i64_rsplat(svint64_t a, int64_t b) {
 
 // CHECK-LABEL: @rshift_i64_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <vscale x 2 x i64> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SHR]]
@@ -461,7 +461,7 @@ svint64_t rshift_i64_lsplat(svint64_t a, int64_t b) {
 
 // CHECK-LABEL: @lshift_u64_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SHL]]
@@ -472,7 +472,7 @@ svuint64_t lshift_u64_rsplat(svuint64_t a, uint64_t b) {
 
 // CHECK-LABEL: @lshift_u64_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <vscale x 2 x i64> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SHL]]
@@ -483,7 +483,7 @@ svuint64_t lshift_u64_lsplat(svuint64_t a, uint64_t b) {
 
 // CHECK-LABEL: @rshift_u64_rsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SHR]]
@@ -494,7 +494,7 @@ svuint64_t rshift_u64_rsplat(svuint64_t a, uint64_t b) {
 
 // CHECK-LABEL: @rshift_u64_lsplat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <vscale x 2 x i64> [[SPLAT_SPLAT]], [[A:%.*]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SHR]]

diff  --git a/clang/test/CodeGen/aarch64-sve-vls-arith-ops.c b/clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
index 7c4ffbee6eaf0..a4648e9d59dcb 100644
--- a/clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
+++ b/clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
@@ -300,7 +300,7 @@ fixed_float64_t add_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-LABEL: @add_scalar_i8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
@@ -313,7 +313,7 @@ fixed_int8_t add_scalar_i8(fixed_int8_t a, int8_t b) {
 // CHECK-LABEL: @add_scalar_i16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
@@ -326,7 +326,7 @@ fixed_int16_t add_scalar_i16(fixed_int16_t a, int16_t b) {
 // CHECK-LABEL: @add_scalar_i32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
@@ -339,7 +339,7 @@ fixed_int32_t add_scalar_i32(fixed_int32_t a, int32_t b) {
 // CHECK-LABEL: @add_scalar_i64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
@@ -352,7 +352,7 @@ fixed_int64_t add_scalar_i64(fixed_int64_t a, int64_t b) {
 // CHECK-LABEL: @add_scalar_u8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <64 x i8> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[ADD]], i64 0)
@@ -365,7 +365,7 @@ fixed_uint8_t add_scalar_u8(fixed_uint8_t a, uint8_t b) {
 // CHECK-LABEL: @add_scalar_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <32 x i16> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[ADD]], i64 0)
@@ -378,7 +378,7 @@ fixed_uint16_t add_scalar_u16(fixed_uint16_t a, uint16_t b) {
 // CHECK-LABEL: @add_scalar_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[ADD]], i64 0)
@@ -391,7 +391,7 @@ fixed_uint32_t add_scalar_u32(fixed_uint32_t a, uint32_t b) {
 // CHECK-LABEL: @add_scalar_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = add <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[ADD]], i64 0)
@@ -404,7 +404,7 @@ fixed_uint64_t add_scalar_u64(fixed_uint64_t a, uint64_t b) {
 // CHECK-LABEL: @add_scalar_f16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <32 x half> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[ADD]], i64 0)
@@ -417,7 +417,7 @@ fixed_float16_t add_scalar_f16(fixed_float16_t a, __fp16 b) {
 // CHECK-LABEL: @add_scalar_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <16 x float> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[ADD]], i64 0)
@@ -430,7 +430,7 @@ fixed_float32_t add_scalar_f32(fixed_float32_t a, float b) {
 // CHECK-LABEL: @add_scalar_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x double> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[ADD]], i64 0)
@@ -715,7 +715,7 @@ fixed_float64_t sub_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-LABEL: @sub_scalar_i8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
@@ -728,7 +728,7 @@ fixed_int8_t sub_scalar_i8(fixed_int8_t a, int8_t b) {
 // CHECK-LABEL: @sub_scalar_i16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
@@ -741,7 +741,7 @@ fixed_int16_t sub_scalar_i16(fixed_int16_t a, int16_t b) {
 // CHECK-LABEL: @sub_scalar_i32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
@@ -754,7 +754,7 @@ fixed_int32_t sub_scalar_i32(fixed_int32_t a, int32_t b) {
 // CHECK-LABEL: @sub_scalar_i64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
@@ -767,7 +767,7 @@ fixed_int64_t sub_scalar_i64(fixed_int64_t a, int64_t b) {
 // CHECK-LABEL: @sub_scalar_u8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <64 x i8> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SUB]], i64 0)
@@ -780,7 +780,7 @@ fixed_uint8_t sub_scalar_u8(fixed_uint8_t a, uint8_t b) {
 // CHECK-LABEL: @sub_scalar_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i16> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SUB]], i64 0)
@@ -793,7 +793,7 @@ fixed_uint16_t sub_scalar_u16(fixed_uint16_t a, uint16_t b) {
 // CHECK-LABEL: @sub_scalar_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SUB]], i64 0)
@@ -806,7 +806,7 @@ fixed_uint32_t sub_scalar_u32(fixed_uint32_t a, uint32_t b) {
 // CHECK-LABEL: @sub_scalar_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SUB]], i64 0)
@@ -819,7 +819,7 @@ fixed_uint64_t sub_scalar_u64(fixed_uint64_t a, uint64_t b) {
 // CHECK-LABEL: @sub_scalar_f16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <32 x half> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[SUB]], i64 0)
@@ -832,7 +832,7 @@ fixed_float16_t sub_scalar_f16(fixed_float16_t a, __fp16 b) {
 // CHECK-LABEL: @sub_scalar_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <16 x float> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[SUB]], i64 0)
@@ -845,7 +845,7 @@ fixed_float32_t sub_scalar_f32(fixed_float32_t a, float b) {
 // CHECK-LABEL: @sub_scalar_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x double> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[SUB]], i64 0)
@@ -1130,7 +1130,7 @@ fixed_float64_t mul_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-LABEL: @mul_scalar_i8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
@@ -1143,7 +1143,7 @@ fixed_int8_t mul_scalar_i8(fixed_int8_t a, int8_t b) {
 // CHECK-LABEL: @mul_scalar_i16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
@@ -1156,7 +1156,7 @@ fixed_int16_t mul_scalar_i16(fixed_int16_t a, int16_t b) {
 // CHECK-LABEL: @mul_scalar_i32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
@@ -1169,7 +1169,7 @@ fixed_int32_t mul_scalar_i32(fixed_int32_t a, int32_t b) {
 // CHECK-LABEL: @mul_scalar_i64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
@@ -1182,7 +1182,7 @@ fixed_int64_t mul_scalar_i64(fixed_int64_t a, int64_t b) {
 // CHECK-LABEL: @mul_scalar_u8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <64 x i8> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[MUL]], i64 0)
@@ -1195,7 +1195,7 @@ fixed_uint8_t mul_scalar_u8(fixed_uint8_t a, uint8_t b) {
 // CHECK-LABEL: @mul_scalar_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i16> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[MUL]], i64 0)
@@ -1208,7 +1208,7 @@ fixed_uint16_t mul_scalar_u16(fixed_uint16_t a, uint16_t b) {
 // CHECK-LABEL: @mul_scalar_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[MUL]], i64 0)
@@ -1221,7 +1221,7 @@ fixed_uint32_t mul_scalar_u32(fixed_uint32_t a, uint32_t b) {
 // CHECK-LABEL: @mul_scalar_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[MUL]], i64 0)
@@ -1234,7 +1234,7 @@ fixed_uint64_t mul_scalar_u64(fixed_uint64_t a, uint64_t b) {
 // CHECK-LABEL: @mul_scalar_f16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <32 x half> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[MUL]], i64 0)
@@ -1247,7 +1247,7 @@ fixed_float16_t mul_scalar_f16(fixed_float16_t a, __fp16 b) {
 // CHECK-LABEL: @mul_scalar_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <16 x float> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[MUL]], i64 0)
@@ -1260,7 +1260,7 @@ fixed_float32_t mul_scalar_f32(fixed_float32_t a, float b) {
 // CHECK-LABEL: @mul_scalar_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x double> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[MUL]], i64 0)
@@ -1545,7 +1545,7 @@ fixed_float64_t div_inplace_f64(fixed_float64_t a, fixed_float64_t b) {
 // CHECK-LABEL: @div_scalar_i8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <64 x i8> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
@@ -1558,7 +1558,7 @@ fixed_int8_t div_scalar_i8(fixed_int8_t a, int8_t b) {
 // CHECK-LABEL: @div_scalar_i16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i16> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
@@ -1571,7 +1571,7 @@ fixed_int16_t div_scalar_i16(fixed_int16_t a, int16_t b) {
 // CHECK-LABEL: @div_scalar_i32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
@@ -1584,7 +1584,7 @@ fixed_int32_t div_scalar_i32(fixed_int32_t a, int32_t b) {
 // CHECK-LABEL: @div_scalar_i64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
@@ -1597,7 +1597,7 @@ fixed_int64_t div_scalar_i64(fixed_int64_t a, int64_t b) {
 // CHECK-LABEL: @div_scalar_u8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <64 x i8> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[DIV]], i64 0)
@@ -1610,7 +1610,7 @@ fixed_uint8_t div_scalar_u8(fixed_uint8_t a, uint8_t b) {
 // CHECK-LABEL: @div_scalar_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i16> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[DIV]], i64 0)
@@ -1623,7 +1623,7 @@ fixed_uint16_t div_scalar_u16(fixed_uint16_t a, uint16_t b) {
 // CHECK-LABEL: @div_scalar_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[DIV]], i64 0)
@@ -1636,7 +1636,7 @@ fixed_uint32_t div_scalar_u32(fixed_uint32_t a, uint32_t b) {
 // CHECK-LABEL: @div_scalar_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[DIV]], i64 0)
@@ -1649,7 +1649,7 @@ fixed_uint64_t div_scalar_u64(fixed_uint64_t a, uint64_t b) {
 // CHECK-LABEL: @div_scalar_f16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x half> @llvm.vector.extract.v32f16.nxv8f16(<vscale x 8 x half> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x half> [[SPLAT_SPLATINSERT]], <32 x half> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <32 x half> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v32f16(<vscale x 8 x half> undef, <32 x half> [[DIV]], i64 0)
@@ -1662,7 +1662,7 @@ fixed_float16_t div_scalar_f16(fixed_float16_t a, __fp16 b) {
 // CHECK-LABEL: @div_scalar_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x float> @llvm.vector.extract.v16f32.nxv4f32(<vscale x 4 x float> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x float> [[SPLAT_SPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <16 x float> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> undef, <16 x float> [[DIV]], i64 0)
@@ -1675,7 +1675,7 @@ fixed_float32_t div_scalar_f32(fixed_float32_t a, float b) {
 // CHECK-LABEL: @div_scalar_f64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x double> @llvm.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x double> [[SPLAT_SPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x double> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[DIV]], i64 0)
@@ -1882,7 +1882,7 @@ fixed_uint64_t rem_inplace_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-LABEL: @rem_scalar_i8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <64 x i8> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
@@ -1895,7 +1895,7 @@ fixed_int8_t rem_scalar_i8(fixed_int8_t a, int8_t b) {
 // CHECK-LABEL: @rem_scalar_i16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <32 x i16> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
@@ -1908,7 +1908,7 @@ fixed_int16_t rem_scalar_i16(fixed_int16_t a, int16_t b) {
 // CHECK-LABEL: @rem_scalar_i32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
@@ -1921,7 +1921,7 @@ fixed_int32_t rem_scalar_i32(fixed_int32_t a, int32_t b) {
 // CHECK-LABEL: @rem_scalar_i64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = srem <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)
@@ -1934,7 +1934,7 @@ fixed_int64_t rem_scalar_i64(fixed_int64_t a, int64_t b) {
 // CHECK-LABEL: @rem_scalar_u8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <64 x i8> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[REM]], i64 0)
@@ -1947,7 +1947,7 @@ fixed_uint8_t rem_scalar_u8(fixed_uint8_t a, uint8_t b) {
 // CHECK-LABEL: @rem_scalar_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <32 x i16> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[REM]], i64 0)
@@ -1960,7 +1960,7 @@ fixed_uint16_t rem_scalar_u16(fixed_uint16_t a, uint16_t b) {
 // CHECK-LABEL: @rem_scalar_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[REM]], i64 0)
@@ -1973,7 +1973,7 @@ fixed_uint32_t rem_scalar_u32(fixed_uint32_t a, uint32_t b) {
 // CHECK-LABEL: @rem_scalar_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[REM:%.*]] = urem <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[REM]], i64 0)

diff  --git a/clang/test/CodeGen/aarch64-sve-vls-shift-ops.c b/clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
index 70f1679ad0c65..05b8f3535df70 100644
--- a/clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
+++ b/clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
@@ -221,7 +221,7 @@ fixed_uint64_t rshift_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[B:%.*]] to i32
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[A]], [[SH_PROM]]
@@ -235,7 +235,7 @@ fixed_int8_t lshift_i8_rsplat(fixed_int8_t a, int8_t b) {
 // CHECK-LABEL: @lshift_i8_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
@@ -249,7 +249,7 @@ fixed_int8_t lshift_i8_lsplat(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[B:%.*]] to i32
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <64 x i8> [[A]], [[SH_PROM]]
@@ -263,7 +263,7 @@ fixed_int8_t rshift_i8_rsplat(fixed_int8_t a, int8_t b) {
 // CHECK-LABEL: @rshift_i8_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <64 x i8> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
@@ -277,7 +277,7 @@ fixed_int8_t rshift_i8_lsplat(fixed_int8_t a, int8_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[B:%.*]] to i32
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[A]], [[SH_PROM]]
@@ -291,7 +291,7 @@ fixed_uint8_t lshift_u8_rsplat(fixed_uint8_t a, uint8_t b) {
 // CHECK-LABEL: @lshift_u8_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <64 x i8> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHL]], i64 0)
@@ -305,7 +305,7 @@ fixed_uint8_t lshift_u8_lsplat(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[B:%.*]] to i32
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[CONV]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i32> [[SPLAT_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <64 x i32> [[SPLAT_SPLAT]] to <64 x i8>
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <64 x i8> [[A]], [[SH_PROM]]
@@ -319,7 +319,7 @@ fixed_uint8_t rshift_u8_rsplat(fixed_uint8_t a, uint8_t b) {
 // CHECK-LABEL: @rshift_u8_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <64 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <64 x i8> [[SPLAT_SPLATINSERT]], <64 x i8> poison, <64 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <64 x i8> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[SHR]], i64 0)
@@ -333,7 +333,7 @@ fixed_uint8_t rshift_u8_lsplat(fixed_uint8_t a, uint8_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[B:%.*]] to i32
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[A]], [[SH_PROM]]
@@ -347,7 +347,7 @@ fixed_int16_t lshift_i16_rsplat(fixed_int16_t a, int16_t b) {
 // CHECK-LABEL: @lshift_i16_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
@@ -361,7 +361,7 @@ fixed_int16_t lshift_i16_lsplat(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[B:%.*]] to i32
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i16> [[A]], [[SH_PROM]]
@@ -375,7 +375,7 @@ fixed_int16_t rshift_i16_rsplat(fixed_int16_t a, int16_t b) {
 // CHECK-LABEL: @rshift_i16_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i16> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
@@ -389,7 +389,7 @@ fixed_int16_t rshift_i16_lsplat(fixed_int16_t a, int16_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[B:%.*]] to i32
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[A]], [[SH_PROM]]
@@ -403,7 +403,7 @@ fixed_uint16_t lshift_u16_rsplat(fixed_uint16_t a, uint16_t b) {
 // CHECK-LABEL: @lshift_u16_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i16> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHL]], i64 0)
@@ -417,7 +417,7 @@ fixed_uint16_t lshift_u16_lsplat(fixed_uint16_t a, uint16_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
 // CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[B:%.*]] to i32
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i16>
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i16> [[A]], [[SH_PROM]]
@@ -431,7 +431,7 @@ fixed_uint16_t rshift_u16_rsplat(fixed_uint16_t a, uint16_t b) {
 // CHECK-LABEL: @rshift_u16_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i16> [[SPLAT_SPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i16> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[SHR]], i64 0)
@@ -444,7 +444,7 @@ fixed_uint16_t rshift_u16_lsplat(fixed_uint16_t a, uint16_t b) {
 // CHECK-LABEL: @lshift_i32_rsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
@@ -457,7 +457,7 @@ fixed_int32_t lshift_i32_rsplat(fixed_int32_t a, int32_t b) {
 // CHECK-LABEL: @lshift_i32_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
@@ -470,7 +470,7 @@ fixed_int32_t lshift_i32_lsplat(fixed_int32_t a, int32_t b) {
 // CHECK-LABEL: @rshift_i32_rsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
@@ -483,7 +483,7 @@ fixed_int32_t rshift_i32_rsplat(fixed_int32_t a, int32_t b) {
 // CHECK-LABEL: @rshift_i32_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i32> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
@@ -496,7 +496,7 @@ fixed_int32_t rshift_i32_lsplat(fixed_int32_t a, int32_t b) {
 // CHECK-LABEL: @lshift_u32_rsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
@@ -509,7 +509,7 @@ fixed_uint32_t lshift_u32_rsplat(fixed_uint32_t a, uint32_t b) {
 // CHECK-LABEL: @lshift_u32_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i32> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHL]], i64 0)
@@ -522,7 +522,7 @@ fixed_uint32_t lshift_u32_lsplat(fixed_uint32_t a, uint32_t b) {
 // CHECK-LABEL: @rshift_u32_rsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i32> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
@@ -535,7 +535,7 @@ fixed_uint32_t rshift_u32_rsplat(fixed_uint32_t a, uint32_t b) {
 // CHECK-LABEL: @rshift_u32_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i32> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[SHR]], i64 0)
@@ -548,7 +548,7 @@ fixed_uint32_t rshift_u32_lsplat(fixed_uint32_t a, uint32_t b) {
 // CHECK-LABEL: @lshift_i64_rsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
@@ -561,7 +561,7 @@ fixed_int64_t lshift_i64_rsplat(fixed_int64_t a, int64_t b) {
 // CHECK-LABEL: @lshift_i64_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
@@ -574,7 +574,7 @@ fixed_int64_t lshift_i64_lsplat(fixed_int64_t a, int64_t b) {
 // CHECK-LABEL: @rshift_i64_rsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
@@ -587,7 +587,7 @@ fixed_int64_t rshift_i64_rsplat(fixed_int64_t a, int64_t b) {
 // CHECK-LABEL: @rshift_i64_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i64> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
@@ -600,7 +600,7 @@ fixed_int64_t rshift_i64_lsplat(fixed_int64_t a, int64_t b) {
 // CHECK-LABEL: @lshift_u64_rsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
@@ -613,7 +613,7 @@ fixed_uint64_t lshift_u64_rsplat(fixed_uint64_t a, uint64_t b) {
 // CHECK-LABEL: @lshift_u64_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i64> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHL]], i64 0)
@@ -626,7 +626,7 @@ fixed_uint64_t lshift_u64_lsplat(fixed_uint64_t a, uint64_t b) {
 // CHECK-LABEL: @rshift_u64_rsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i64> [[A]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)
@@ -639,7 +639,7 @@ fixed_uint64_t rshift_u64_rsplat(fixed_uint64_t a, uint64_t b) {
 // CHECK-LABEL: @rshift_u64_lsplat(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i64> [[SPLAT_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i64> [[SPLAT_SPLAT]], [[A]]
 // CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[SHR]], i64 0)

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/compare.c b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
index 84a6d561fc899..9f68735e8df35 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/compare.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
@@ -136,7 +136,7 @@ mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
 
 // CHECK-LABEL: @test_vcmpeqq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -154,7 +154,7 @@ mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
 
 // CHECK-LABEL: @test_vcmpeqq_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -172,7 +172,7 @@ mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
 
 // CHECK-LABEL: @test_vcmpeqq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
@@ -190,7 +190,7 @@ mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
 
 // CHECK-LABEL: @test_vcmpeqq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -208,7 +208,7 @@ mve_pred16_t test_vcmpeqq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vcmpeqq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -226,7 +226,7 @@ mve_pred16_t test_vcmpeqq_n_s32(int32x4_t a, int32_t b)
 
 // CHECK-LABEL: @test_vcmpeqq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
@@ -244,7 +244,7 @@ mve_pred16_t test_vcmpeqq_n_u8(uint8x16_t a, uint8_t b)
 
 // CHECK-LABEL: @test_vcmpeqq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -262,7 +262,7 @@ mve_pred16_t test_vcmpeqq_n_u16(uint16x8_t a, uint16_t b)
 
 // CHECK-LABEL: @test_vcmpeqq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -434,7 +434,7 @@ mve_pred16_t test_vcmpeqq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -455,7 +455,7 @@ mve_pred16_t test_vcmpeqq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -476,7 +476,7 @@ mve_pred16_t test_vcmpeqq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
@@ -497,7 +497,7 @@ mve_pred16_t test_vcmpeqq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -518,7 +518,7 @@ mve_pred16_t test_vcmpeqq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -539,7 +539,7 @@ mve_pred16_t test_vcmpeqq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
@@ -560,7 +560,7 @@ mve_pred16_t test_vcmpeqq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -581,7 +581,7 @@ mve_pred16_t test_vcmpeqq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -728,7 +728,7 @@ mve_pred16_t test_vcmpneq_u32(uint32x4_t a, uint32x4_t b)
 
 // CHECK-LABEL: @test_vcmpneq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -746,7 +746,7 @@ mve_pred16_t test_vcmpneq_n_f16(float16x8_t a, float16_t b)
 
 // CHECK-LABEL: @test_vcmpneq_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp une <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -764,7 +764,7 @@ mve_pred16_t test_vcmpneq_n_f32(float32x4_t a, float32_t b)
 
 // CHECK-LABEL: @test_vcmpneq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
@@ -782,7 +782,7 @@ mve_pred16_t test_vcmpneq_n_s8(int8x16_t a, int8_t b)
 
 // CHECK-LABEL: @test_vcmpneq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -800,7 +800,7 @@ mve_pred16_t test_vcmpneq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vcmpneq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -818,7 +818,7 @@ mve_pred16_t test_vcmpneq_n_s32(int32x4_t a, int32_t b)
 
 // CHECK-LABEL: @test_vcmpneq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
@@ -836,7 +836,7 @@ mve_pred16_t test_vcmpneq_n_u8(uint8x16_t a, uint8_t b)
 
 // CHECK-LABEL: @test_vcmpneq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -854,7 +854,7 @@ mve_pred16_t test_vcmpneq_n_u16(uint16x8_t a, uint16_t b)
 
 // CHECK-LABEL: @test_vcmpneq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -1026,7 +1026,7 @@ mve_pred16_t test_vcmpneq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp une <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -1047,7 +1047,7 @@ mve_pred16_t test_vcmpneq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp une <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -1068,7 +1068,7 @@ mve_pred16_t test_vcmpneq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
@@ -1089,7 +1089,7 @@ mve_pred16_t test_vcmpneq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -1110,7 +1110,7 @@ mve_pred16_t test_vcmpneq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -1131,7 +1131,7 @@ mve_pred16_t test_vcmpneq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
@@ -1152,7 +1152,7 @@ mve_pred16_t test_vcmpneq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -1173,7 +1173,7 @@ mve_pred16_t test_vcmpneq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -1320,7 +1320,7 @@ mve_pred16_t test_vcmpcsq_u32(uint32x4_t a, uint32x4_t b)
 
 // CHECK-LABEL: @test_vcmpgeq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -1338,7 +1338,7 @@ mve_pred16_t test_vcmpgeq_n_f16(float16x8_t a, float16_t b)
 
 // CHECK-LABEL: @test_vcmpgeq_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp oge <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -1356,7 +1356,7 @@ mve_pred16_t test_vcmpgeq_n_f32(float32x4_t a, float32_t b)
 
 // CHECK-LABEL: @test_vcmpgeq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sge <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
@@ -1374,7 +1374,7 @@ mve_pred16_t test_vcmpgeq_n_s8(int8x16_t a, int8_t b)
 
 // CHECK-LABEL: @test_vcmpgeq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sge <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -1392,7 +1392,7 @@ mve_pred16_t test_vcmpgeq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vcmpgeq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -1410,7 +1410,7 @@ mve_pred16_t test_vcmpgeq_n_s32(int32x4_t a, int32_t b)
 
 // CHECK-LABEL: @test_vcmpcsq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp uge <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
@@ -1428,7 +1428,7 @@ mve_pred16_t test_vcmpcsq_n_u8(uint8x16_t a, uint8_t b)
 
 // CHECK-LABEL: @test_vcmpcsq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -1446,7 +1446,7 @@ mve_pred16_t test_vcmpcsq_n_u16(uint16x8_t a, uint16_t b)
 
 // CHECK-LABEL: @test_vcmpcsq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -1618,7 +1618,7 @@ mve_pred16_t test_vcmpcsq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp oge <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -1639,7 +1639,7 @@ mve_pred16_t test_vcmpgeq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp oge <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -1660,7 +1660,7 @@ mve_pred16_t test_vcmpgeq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
@@ -1681,7 +1681,7 @@ mve_pred16_t test_vcmpgeq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -1702,7 +1702,7 @@ mve_pred16_t test_vcmpgeq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -1723,7 +1723,7 @@ mve_pred16_t test_vcmpgeq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
@@ -1744,7 +1744,7 @@ mve_pred16_t test_vcmpcsq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -1765,7 +1765,7 @@ mve_pred16_t test_vcmpcsq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -1912,7 +1912,7 @@ mve_pred16_t test_vcmphiq_u32(uint32x4_t a, uint32x4_t b)
 
 // CHECK-LABEL: @test_vcmpgtq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -1930,7 +1930,7 @@ mve_pred16_t test_vcmpgtq_n_f16(float16x8_t a, float16_t b)
 
 // CHECK-LABEL: @test_vcmpgtq_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp ogt <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -1948,7 +1948,7 @@ mve_pred16_t test_vcmpgtq_n_f32(float32x4_t a, float32_t b)
 
 // CHECK-LABEL: @test_vcmpgtq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
@@ -1966,7 +1966,7 @@ mve_pred16_t test_vcmpgtq_n_s8(int8x16_t a, int8_t b)
 
 // CHECK-LABEL: @test_vcmpgtq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -1984,7 +1984,7 @@ mve_pred16_t test_vcmpgtq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vcmpgtq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -2002,7 +2002,7 @@ mve_pred16_t test_vcmpgtq_n_s32(int32x4_t a, int32_t b)
 
 // CHECK-LABEL: @test_vcmphiq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
@@ -2020,7 +2020,7 @@ mve_pred16_t test_vcmphiq_n_u8(uint8x16_t a, uint8_t b)
 
 // CHECK-LABEL: @test_vcmphiq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -2038,7 +2038,7 @@ mve_pred16_t test_vcmphiq_n_u16(uint16x8_t a, uint16_t b)
 
 // CHECK-LABEL: @test_vcmphiq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -2210,7 +2210,7 @@ mve_pred16_t test_vcmphiq_m_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp ogt <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -2231,7 +2231,7 @@ mve_pred16_t test_vcmpgtq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp ogt <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -2252,7 +2252,7 @@ mve_pred16_t test_vcmpgtq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
@@ -2273,7 +2273,7 @@ mve_pred16_t test_vcmpgtq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -2294,7 +2294,7 @@ mve_pred16_t test_vcmpgtq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -2315,7 +2315,7 @@ mve_pred16_t test_vcmpgtq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
@@ -2336,7 +2336,7 @@ mve_pred16_t test_vcmphiq_m_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -2357,7 +2357,7 @@ mve_pred16_t test_vcmphiq_m_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -2456,7 +2456,7 @@ mve_pred16_t test_vcmpleq_s32(int32x4_t a, int32x4_t b)
 
 // CHECK-LABEL: @test_vcmpleq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -2474,7 +2474,7 @@ mve_pred16_t test_vcmpleq_n_f16(float16x8_t a, float16_t b)
 
 // CHECK-LABEL: @test_vcmpleq_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -2492,7 +2492,7 @@ mve_pred16_t test_vcmpleq_n_f32(float32x4_t a, float32_t b)
 
 // CHECK-LABEL: @test_vcmpleq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sle <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
@@ -2510,7 +2510,7 @@ mve_pred16_t test_vcmpleq_n_s8(int8x16_t a, int8_t b)
 
 // CHECK-LABEL: @test_vcmpleq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sle <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -2528,7 +2528,7 @@ mve_pred16_t test_vcmpleq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vcmpleq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -2643,7 +2643,7 @@ mve_pred16_t test_vcmpleq_m_s32(int32x4_t a, int32x4_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -2664,7 +2664,7 @@ mve_pred16_t test_vcmpleq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -2685,7 +2685,7 @@ mve_pred16_t test_vcmpleq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
@@ -2706,7 +2706,7 @@ mve_pred16_t test_vcmpleq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -2727,7 +2727,7 @@ mve_pred16_t test_vcmpleq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -2826,7 +2826,7 @@ mve_pred16_t test_vcmpltq_s32(int32x4_t a, int32x4_t b)
 
 // CHECK-LABEL: @test_vcmpltq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -2844,7 +2844,7 @@ mve_pred16_t test_vcmpltq_n_f16(float16x8_t a, float16_t b)
 
 // CHECK-LABEL: @test_vcmpltq_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -2862,7 +2862,7 @@ mve_pred16_t test_vcmpltq_n_f32(float32x4_t a, float32_t b)
 
 // CHECK-LABEL: @test_vcmpltq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
@@ -2880,7 +2880,7 @@ mve_pred16_t test_vcmpltq_n_s8(int8x16_t a, int8_t b)
 
 // CHECK-LABEL: @test_vcmpltq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -2898,7 +2898,7 @@ mve_pred16_t test_vcmpltq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vcmpltq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
@@ -3013,7 +3013,7 @@ mve_pred16_t test_vcmpltq_m_s32(int32x4_t a, int32x4_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -3034,7 +3034,7 @@ mve_pred16_t test_vcmpltq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
@@ -3055,7 +3055,7 @@ mve_pred16_t test_vcmpltq_m_n_f32(float32x4_t a, float32_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[TMP1]], [[TMP2]]
@@ -3076,7 +3076,7 @@ mve_pred16_t test_vcmpltq_m_n_s8(int8x16_t a, int8_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
@@ -3097,7 +3097,7 @@ mve_pred16_t test_vcmpltq_m_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
index 014ea687c9b1f..77170745756c4 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
+++ b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
@@ -80,7 +80,7 @@ mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
 
 // CHECK-LABEL: @_Z18test_vcmpeqq_n_f1619__simd128_float16_tDh(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
@@ -98,8 +98,8 @@ mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
 
 // CHECK-LABEL: @_Z14test_vld1q_u16PKt(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vld1q_u16(const uint16_t *base)
 {
@@ -112,9 +112,9 @@ uint16x8_t test_vld1q_u16(const uint16_t *base)
 
 // CHECK-LABEL: @_Z16test_vst1q_p_s32Pi17__simd128_int32_tt(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/dup.c b/clang/test/CodeGen/arm-mve-intrinsics/dup.c
index cac8a46ea0711..0361e03335be3 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/dup.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/dup.c
@@ -8,7 +8,7 @@
 
 // CHECK-LABEL: @test_vdupq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    ret <8 x half> [[DOTSPLAT]]
 //
@@ -19,7 +19,7 @@ float16x8_t test_vdupq_n_f16(float16_t a)
 
 // CHECK-LABEL: @test_vdupq_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    ret <4 x float> [[DOTSPLAT]]
 //
@@ -30,7 +30,7 @@ float32x4_t test_vdupq_n_f32(float32_t a)
 
 // CHECK-LABEL: @test_vdupq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    ret <16 x i8> [[DOTSPLAT]]
 //
@@ -41,7 +41,7 @@ int8x16_t test_vdupq_n_s8(int8_t a)
 
 // CHECK-LABEL: @test_vdupq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    ret <8 x i16> [[DOTSPLAT]]
 //
@@ -52,7 +52,7 @@ int16x8_t test_vdupq_n_s16(int16_t a)
 
 // CHECK-LABEL: @test_vdupq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    ret <4 x i32> [[DOTSPLAT]]
 //
@@ -63,7 +63,7 @@ int32x4_t test_vdupq_n_s32(int32_t a)
 
 // CHECK-LABEL: @test_vdupq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    ret <16 x i8> [[DOTSPLAT]]
 //
@@ -74,7 +74,7 @@ uint8x16_t test_vdupq_n_u8(uint8_t a)
 
 // CHECK-LABEL: @test_vdupq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    ret <8 x i16> [[DOTSPLAT]]
 //
@@ -85,7 +85,7 @@ uint16x8_t test_vdupq_n_u16(uint16_t a)
 
 // CHECK-LABEL: @test_vdupq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    ret <4 x i32> [[DOTSPLAT]]
 //
@@ -98,7 +98,7 @@ uint32x4_t test_vdupq_n_u32(uint32_t a)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <8 x half> [[TMP2]]
@@ -116,7 +116,7 @@ float16x8_t test_vdupq_m_n_f16(float16x8_t inactive, float16_t a, mve_pred16_t p
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[DOTSPLAT]], <4 x float> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <4 x float> [[TMP2]]
@@ -134,7 +134,7 @@ float32x4_t test_vdupq_m_n_f32(float32x4_t inactive, float32_t a, mve_pred16_t p
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <16 x i8> [[TMP2]]
@@ -152,7 +152,7 @@ int8x16_t test_vdupq_m_n_s8(int8x16_t inactive, int8_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
@@ -170,7 +170,7 @@ int16x8_t test_vdupq_m_n_s16(int16x8_t inactive, int16_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -188,7 +188,7 @@ int32x4_t test_vdupq_m_n_s32(int32x4_t inactive, int32_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <16 x i8> [[TMP2]]
@@ -206,7 +206,7 @@ uint8x16_t test_vdupq_m_n_u8(uint8x16_t inactive, uint8_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
@@ -224,7 +224,7 @@ uint16x8_t test_vdupq_m_n_u16(uint16x8_t inactive, uint16_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -242,7 +242,7 @@ uint32x4_t test_vdupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> undef
 // CHECK-NEXT:    ret <8 x half> [[TMP2]]
@@ -256,7 +256,7 @@ float16x8_t test_vdupq_x_n_f16(float16_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[DOTSPLAT]], <4 x float> undef
 // CHECK-NEXT:    ret <4 x float> [[TMP2]]
@@ -270,7 +270,7 @@ float32x4_t test_vdupq_x_n_f32(float32_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef
 // CHECK-NEXT:    ret <16 x i8> [[TMP2]]
@@ -284,7 +284,7 @@ int8x16_t test_vdupq_x_n_s8(int8_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef
 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
@@ -298,7 +298,7 @@ int16x8_t test_vdupq_x_n_s16(int16_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
@@ -312,7 +312,7 @@ int32x4_t test_vdupq_x_n_s32(int32_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef
 // CHECK-NEXT:    ret <16 x i8> [[TMP2]]
@@ -326,7 +326,7 @@ uint8x16_t test_vdupq_x_n_u8(uint8_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef
 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
@@ -340,7 +340,7 @@ uint16x8_t test_vdupq_x_n_u16(uint16_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
index 47dd5e5a9096e..afdddebcf2aa7 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
@@ -34,7 +34,7 @@ float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
 
 // CHECK-LABEL: @test_vfmaq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]], <8 x half> [[A:%.*]])
 // CHECK-NEXT:    ret <8 x half> [[TMP0]]
@@ -49,7 +49,7 @@ float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
 
 // CHECK-LABEL: @test_vfmaq_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]], <4 x float> [[A:%.*]])
 // CHECK-NEXT:    ret <4 x float> [[TMP0]]
@@ -64,7 +64,7 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
 
 // CHECK-LABEL: @test_vfmasq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <8 x half> [[TMP0]]
@@ -79,7 +79,7 @@ float16x8_t test_vfmasq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
 
 // CHECK-LABEL: @test_vfmasq_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <4 x float> [[TMP0]]
@@ -122,7 +122,7 @@ float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
 
 // CHECK-LABEL: @test_vmlaq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <16 x i8> [[B:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[A:%.*]]
@@ -138,7 +138,7 @@ int8x16_t test_vmlaq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
 
 // CHECK-LABEL: @test_vmlaq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <8 x i16> [[B:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[A:%.*]]
@@ -154,7 +154,7 @@ int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
 
 // CHECK-LABEL: @test_vmlaq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <4 x i32> [[B:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[A:%.*]]
@@ -170,7 +170,7 @@ int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
 
 // CHECK-LABEL: @test_vmlaq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <16 x i8> [[B:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[A:%.*]]
@@ -186,7 +186,7 @@ uint8x16_t test_vmlaq_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c) {
 
 // CHECK-LABEL: @test_vmlaq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <8 x i16> [[B:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[A:%.*]]
@@ -202,7 +202,7 @@ uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
 
 // CHECK-LABEL: @test_vmlaq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <4 x i32> [[B:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[A:%.*]]
@@ -219,7 +219,7 @@ uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
 // CHECK-LABEL: @test_vmlasq_n_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
@@ -235,7 +235,7 @@ int8x16_t test_vmlasq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
 // CHECK-LABEL: @test_vmlasq_n_s16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
@@ -251,7 +251,7 @@ int16x8_t test_vmlasq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
 // CHECK-LABEL: @test_vmlasq_n_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -267,7 +267,7 @@ int32x4_t test_vmlasq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
 // CHECK-LABEL: @test_vmlasq_n_u8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
@@ -283,7 +283,7 @@ uint8x16_t test_vmlasq_n_u8(uint8x16_t a, uint8x16_t b, uint8_t c) {
 // CHECK-LABEL: @test_vmlasq_n_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
@@ -299,7 +299,7 @@ uint16x8_t test_vmlasq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
 // CHECK-LABEL: @test_vmlasq_n_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
@@ -508,7 +508,7 @@ float32x4_t test_vfmaq_m_f32(float32x4_t a, float32x4_t b, float32x4_t c, mve_pr
 
 // CHECK-LABEL: @test_vfmaq_m_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -525,7 +525,7 @@ float16x8_t test_vfmaq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, mve_pr
 
 // CHECK-LABEL: @test_vfmaq_m_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
@@ -542,7 +542,7 @@ float32x4_t test_vfmaq_m_n_f32(float32x4_t a, float32x4_t b, float32_t c, mve_pr
 
 // CHECK-LABEL: @test_vfmasq_m_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -559,7 +559,7 @@ float16x8_t test_vfmasq_m_n_f16(float16x8_t a, float16x8_t b, float16_t c, mve_p
 
 // CHECK-LABEL: @test_vfmasq_m_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[C:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
index 9006d2ce77c5c..516e26b2698b5 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
@@ -100,7 +100,7 @@ float16x8_t test_vaddq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vaddq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = add <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -116,7 +116,7 @@ uint32x4_t test_vaddq_n_u32(uint32x4_t a, uint32_t b)
 
 // CHECK-LABEL: @test_vaddq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <8 x half> [[TMP0]]
@@ -132,7 +132,7 @@ float16x8_t test_vaddq_n_f16(float16x8_t a, float16_t b)
 
 // CHECK-LABEL: @test_vaddq_m_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -150,7 +150,7 @@ int8x16_t test_vaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1
 
 // CHECK-LABEL: @test_vaddq_m_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
@@ -168,7 +168,7 @@ float32x4_t test_vaddq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b,
 
 // CHECK-LABEL: @test_vaddq_x_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -186,7 +186,7 @@ uint16x8_t test_vaddq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vaddq_x_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c
index c99ba2b281b40..3cb641a8b2cfb 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vhaddq.c
@@ -146,7 +146,7 @@ uint32x4_t test_vhaddq_x_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vhaddq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vhadd.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1)
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
@@ -162,7 +162,7 @@ uint8x16_t test_vhaddq_n_u8(uint8x16_t a, uint8_t b)
 
 // CHECK-LABEL: @test_vhaddq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vhadd.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
@@ -178,7 +178,7 @@ int16x8_t test_vhaddq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vhaddq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vhadd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -194,7 +194,7 @@ uint32x4_t test_vhaddq_n_u32(uint32x4_t a, uint32_t b)
 
 // CHECK-LABEL: @test_vhaddq_m_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -212,7 +212,7 @@ int8x16_t test_vhaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred
 
 // CHECK-LABEL: @test_vhaddq_m_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -230,7 +230,7 @@ uint16x8_t test_vhaddq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv
 
 // CHECK-LABEL: @test_vhaddq_m_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
@@ -248,7 +248,7 @@ int32x4_t test_vhaddq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pr
 
 // CHECK-LABEL: @test_vhaddq_x_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -266,7 +266,7 @@ uint8x16_t test_vhaddq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vhaddq_x_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -284,7 +284,7 @@ int16x8_t test_vhaddq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vhaddq_x_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c b/clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c
index f474af944ea2f..20bd665e0af55 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vhsubq.c
@@ -98,7 +98,7 @@ int32x4_t test_vhsubq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pr
 
 // CHECK-LABEL: @test_vhsubq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vhsub.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1)
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
@@ -114,7 +114,7 @@ uint8x16_t test_vhsubq_n_u8(uint8x16_t a, uint8_t b)
 
 // CHECK-LABEL: @test_vhsubq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vhsub.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
@@ -130,7 +130,7 @@ int16x8_t test_vhsubq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vhsubq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vhsub.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -146,7 +146,7 @@ uint32x4_t test_vhsubq_n_u32(uint32x4_t a, uint32_t b)
 
 // CHECK-LABEL: @test_vhsubq_m_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -164,7 +164,7 @@ int8x16_t test_vhsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred
 
 // CHECK-LABEL: @test_vhsubq_m_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -182,7 +182,7 @@ uint16x8_t test_vhsubq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv
 
 // CHECK-LABEL: @test_vhsubq_m_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
@@ -200,7 +200,7 @@ int32x4_t test_vhsubq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pr
 
 // CHECK-LABEL: @test_vhsubq_x_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -218,7 +218,7 @@ uint8x16_t test_vhsubq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vhsubq_x_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -236,7 +236,7 @@ int16x8_t test_vhsubq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vhsubq_x_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
index fd1f58281a5f2..332985dec9e89 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c
@@ -192,7 +192,7 @@ float32x4_t test_vmulq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vmulq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
@@ -208,7 +208,7 @@ uint8x16_t test_vmulq_n_u8(uint8x16_t a, uint8_t b)
 
 // CHECK-LABEL: @test_vmulq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
@@ -224,7 +224,7 @@ int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vmulq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -240,7 +240,7 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b)
 
 // CHECK-LABEL: @test_vmulq_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <4 x float> [[TMP0]]
@@ -256,7 +256,7 @@ float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b)
 
 // CHECK-LABEL: @test_vmulq_m_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -274,7 +274,7 @@ int8x16_t test_vmulq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1
 
 // CHECK-LABEL: @test_vmulq_m_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -292,7 +292,7 @@ uint16x8_t test_vmulq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve
 
 // CHECK-LABEL: @test_vmulq_m_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
@@ -310,7 +310,7 @@ int32x4_t test_vmulq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pre
 
 // CHECK-LABEL: @test_vmulq_m_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -328,7 +328,7 @@ float16x8_t test_vmulq_m_n_f16(float16x8_t inactive, float16x8_t a, float16_t b,
 
 // CHECK-LABEL: @test_vmulq_x_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -346,7 +346,7 @@ uint8x16_t test_vmulq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vmulq_x_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -363,7 +363,7 @@ int16x8_t test_vmulq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
 }
 // CHECK-LABEL: @test_vmulq_x_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
@@ -381,7 +381,7 @@ uint32x4_t test_vmulq_x_n_u32(uint32x4_t a, uint32_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vmulq_x_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c
index b870e086db0dd..b0f45388e562c 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vqaddq.c
@@ -98,7 +98,7 @@ int32x4_t test_vqaddq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pr
 
 // CHECK-LABEL: @test_vqaddq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
@@ -114,7 +114,7 @@ uint8x16_t test_vqaddq_n_u8(uint8x16_t a, uint8_t b)
 
 // CHECK-LABEL: @test_vqaddq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
@@ -130,7 +130,7 @@ int16x8_t test_vqaddq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vqaddq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -146,7 +146,7 @@ uint32x4_t test_vqaddq_n_u32(uint32x4_t a, uint32_t b)
 
 // CHECK-LABEL: @test_vqaddq_m_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -164,7 +164,7 @@ int8x16_t test_vqaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred
 
 // CHECK-LABEL: @test_vqaddq_m_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -182,7 +182,7 @@ uint16x8_t test_vqaddq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv
 
 // CHECK-LABEL: @test_vqaddq_m_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c
index 6ad53085b1e2d..d9457376186bb 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vqdmulhq.c
@@ -98,7 +98,7 @@ int32x4_t test_vqdmulhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_
 
 // CHECK-LABEL: @test_vqdmulhq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmulh.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
@@ -114,7 +114,7 @@ int8x16_t test_vqdmulhq_n_s8(int8x16_t a, int8_t b)
 
 // CHECK-LABEL: @test_vqdmulhq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmulh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
@@ -130,7 +130,7 @@ int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vqdmulhq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmulh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -146,7 +146,7 @@ int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b)
 
 // CHECK-LABEL: @test_vqdmulhq_m_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -164,7 +164,7 @@ int8x16_t test_vqdmulhq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pr
 
 // CHECK-LABEL: @test_vqdmulhq_m_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -182,7 +182,7 @@ int16x8_t test_vqdmulhq_m_n_s16(int16x8_t inactive, int16x8_t a, int16_t b, mve_
 
 // CHECK-LABEL: @test_vqdmulhq_m_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c
index 778ed3cf2e6d6..361fcf8019d11 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vqdmullbq.c
@@ -64,7 +64,7 @@ int64x2_t test_vqdmullbq_m_s32(int64x2_t inactive, int32x4_t a, int32x4_t b, mve
 
 // CHECK-LABEL: @test_vqdmullbq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -79,7 +79,7 @@ int32x4_t test_vqdmullbq_n_s16(int16x8_t a, int16_t b) {
 
 // CHECK-LABEL: @test_vqdmullbq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0)
 // CHECK-NEXT:    ret <2 x i64> [[TMP0]]
@@ -94,7 +94,7 @@ int64x2_t test_vqdmullbq_n_s32(int32x4_t a, int32_t b) {
 
 // CHECK-LABEL: @test_vqdmullbq_m_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
@@ -111,7 +111,7 @@ int32x4_t test_vqdmullbq_m_n_s16(int32x4_t inactive, int16x8_t a, int16_t b, mve
 
 // CHECK-LABEL: @test_vqdmullbq_m_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c
index a1b247d75dffb..acc2b3c04cc30 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vqdmulltq.c
@@ -64,7 +64,7 @@ int64x2_t test_vqdmulltq_m_s32(int64x2_t inactive, int32x4_t a, int32x4_t b, mve
 
 // CHECK-LABEL: @test_vqdmulltq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmull.v4i32.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -79,7 +79,7 @@ int32x4_t test_vqdmulltq_n_s16(int16x8_t a, int16_t b) {
 
 // CHECK-LABEL: @test_vqdmulltq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1)
 // CHECK-NEXT:    ret <2 x i64> [[TMP0]]
@@ -94,7 +94,7 @@ int64x2_t test_vqdmulltq_n_s32(int32x4_t a, int32_t b) {
 
 // CHECK-LABEL: @test_vqdmulltq_m_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
@@ -111,7 +111,7 @@ int32x4_t test_vqdmulltq_m_n_s16(int32x4_t inactive, int16x8_t a, int16_t b, mve
 
 // CHECK-LABEL: @test_vqdmulltq_m_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c
index dd27d7a063a95..3538b901f5eb2 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vqrdmulhq.c
@@ -98,7 +98,7 @@ int32x4_t test_vqrdmulhq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve
 
 // CHECK-LABEL: @test_vqrdmulhq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmulh.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
@@ -114,7 +114,7 @@ int8x16_t test_vqrdmulhq_n_s8(int8x16_t a, int8_t b)
 
 // CHECK-LABEL: @test_vqrdmulhq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmulh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
@@ -130,7 +130,7 @@ int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vqrdmulhq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -146,7 +146,7 @@ int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b)
 
 // CHECK-LABEL: @test_vqrdmulhq_m_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -164,7 +164,7 @@ int8x16_t test_vqrdmulhq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_p
 
 // CHECK-LABEL: @test_vqrdmulhq_m_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -182,7 +182,7 @@ int16x8_t test_vqrdmulhq_m_n_s16(int16x8_t inactive, int16x8_t a, int16_t b, mve
 
 // CHECK-LABEL: @test_vqrdmulhq_m_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c b/clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c
index b4c7a55e54e99..51ab22ad991f1 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vqsubq.c
@@ -98,7 +98,7 @@ int32x4_t test_vqsubq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pr
 
 // CHECK-LABEL: @test_vqsubq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
@@ -114,7 +114,7 @@ uint8x16_t test_vqsubq_n_u8(uint8x16_t a, uint8_t b)
 
 // CHECK-LABEL: @test_vqsubq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
@@ -130,7 +130,7 @@ int16x8_t test_vqsubq_n_s16(int16x8_t a, int16_t b)
 
 // CHECK-LABEL: @test_vqsubq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]])
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -146,7 +146,7 @@ uint32x4_t test_vqsubq_n_u32(uint32x4_t a, uint32_t b)
 
 // CHECK-LABEL: @test_vqsubq_m_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -164,7 +164,7 @@ int8x16_t test_vqsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred
 
 // CHECK-LABEL: @test_vqsubq_m_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -182,7 +182,7 @@ uint16x8_t test_vqsubq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mv
 
 // CHECK-LABEL: @test_vqsubq_m_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c b/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
index b6e1ebe4afd7f..0a042b4e8c93e 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vsubq.c
@@ -100,7 +100,7 @@ float16x8_t test_vsubq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vsubq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = sub <4 x i32> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
@@ -116,7 +116,7 @@ uint32x4_t test_vsubq_n_u32(uint32x4_t a, uint32_t b)
 
 // CHECK-LABEL: @test_vsubq_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[DOTSPLAT]]
 // CHECK-NEXT:    ret <8 x half> [[TMP0]]
@@ -132,7 +132,7 @@ float16x8_t test_vsubq_n_f16(float16x8_t a, float16_t b)
 
 // CHECK-LABEL: @test_vsubq_m_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
@@ -150,7 +150,7 @@ int8x16_t test_vsubq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred1
 
 // CHECK-LABEL: @test_vsubq_m_n_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
@@ -168,7 +168,7 @@ float32x4_t test_vsubq_m_n_f32(float32x4_t inactive, float32x4_t a, float32_t b,
 
 // CHECK-LABEL: @test_vsubq_x_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
@@ -186,7 +186,7 @@ uint16x8_t test_vsubq_x_n_u16(uint16x8_t a, uint16_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vsubq_x_n_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i32 0
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])

diff  --git a/clang/test/CodeGen/matrix-type-operators-fast-math.c b/clang/test/CodeGen/matrix-type-operators-fast-math.c
index f36eda73b6c99..ae02553adb90f 100644
--- a/clang/test/CodeGen/matrix-type-operators-fast-math.c
+++ b/clang/test/CodeGen/matrix-type-operators-fast-math.c
@@ -42,7 +42,7 @@ void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
   // CHECK:       [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8
   // CHECK-NEXT:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4
   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd reassoc nnan ninf nsz arcp afn <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
@@ -55,7 +55,7 @@ void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
   // CHECK:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4
   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
   // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd reassoc nnan ninf nsz arcp afn <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
@@ -68,7 +68,7 @@ void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
   // CHECK:  [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4
   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
   // CHECK-NEXT:  [[MATRIX:%.*]] = load <25 x double>, ptr {{.*}}, align 8
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fsub reassoc nnan ninf nsz arcp afn <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <25 x double> [[RES]], ptr {{.*}}, align 8
@@ -105,7 +105,7 @@ void multiply_compound_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
 // CHECK:         [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8
 // CHECK-NEXT:    [[S:%.*]] = load float, ptr %s.addr, align 4
 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fmul reassoc nnan ninf nsz arcp afn <25 x double> [[A]], [[VECSPLAT]]
 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
@@ -119,7 +119,7 @@ void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
 // CHECK:         [[S:%.*]] = load float, ptr %s.addr, align 4
 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
 // CHECK-NEXT:    [[A:%.*]] = load <25 x double>, ptr {{.*}}, align 8
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fmul reassoc nnan ninf nsz arcp afn <25 x double> [[A]], [[VECSPLAT]]
 // CHECK-NEXT:    store <25 x double> [[RES]], ptr {{.*}}, align 8
@@ -133,7 +133,7 @@ void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
 // CHECK:         [[MAT:%.*]] = load <6 x float>, ptr [[MAT_ADDR:%.*]], align 4
 // CHECK-NEXT:    [[S:%.*]] = load double, ptr %s.addr, align 8
 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fdiv reassoc nnan ninf nsz arcp afn <6 x float> [[MAT]], [[VECSPLAT]]
 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[MAT_ADDR]], align 4

diff  --git a/clang/test/CodeGen/matrix-type-operators.c b/clang/test/CodeGen/matrix-type-operators.c
index 8f80877b271a7..62a1838ed3a56 100644
--- a/clang/test/CodeGen/matrix-type-operators.c
+++ b/clang/test/CodeGen/matrix-type-operators.c
@@ -88,7 +88,7 @@ void add_matrix_scalar_double_float(dx5x5_t a, float vf) {
   // OPT:         [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:    [[SCALAR:%.*]] = load float, float* %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -103,7 +103,7 @@ void add_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -118,7 +118,7 @@ void subtract_compound_matrix_scalar_double_float(dx5x5_t a, float vf) {
   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR_EXT]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -132,7 +132,7 @@ void add_matrix_scalar_double_double(dx5x5_t a, double vd) {
   // NOOPT-NEXT:  [[SCALAR:%.*]] = load double, double* %vd.addr, align 8{{$}}
   // OPT:         [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:    [[SCALAR:%.*]] = load double, double* %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -146,7 +146,7 @@ void add_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8{{$}}
   // OPT:         [[SCALAR:%.*]] = load double, double* %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -159,7 +159,7 @@ void subtract_compound_matrix_scalar_double_double(dx5x5_t a, double vd) {
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8{{$}}
   // OPT:         [[SCALAR:%.*]] = load double, double* %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <25 x double> poison, double [[SCALAR]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <25 x double> [[SCALAR_EMBED]], <25 x double> poison, <25 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fsub <25 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -172,7 +172,7 @@ void add_matrix_scalar_float_float(fx2x3_t b, float vf) {
   // NOOPT-NEXT:  [[SCALAR:%.*]] = load float, float* %vf.addr, align 4{{$}}
   // OPT:         [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:    [[SCALAR:%.*]] = load float, float* %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
@@ -186,7 +186,7 @@ void add_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4{{$}}
   // OPT:         [[SCALAR:%.*]] = load float, float* %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
@@ -199,7 +199,7 @@ void subtract_compound_matrix_scalar_float_float(fx2x3_t b, float vf) {
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4{{$}}
   // OPT:         [[SCALAR:%.*]] = load float, float* %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, <6 x float>* %0, align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
@@ -213,7 +213,7 @@ void add_matrix_scalar_float_double(fx2x3_t b, double vd) {
   // OPT:         [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:    [[SCALAR:%.*]] = load double, double* %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
@@ -228,7 +228,7 @@ void add_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
@@ -242,7 +242,7 @@ void subtract_compound_matrix_scalar_float_double(fx2x3_t b, double vd) {
   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <6 x float>, <6 x float>* {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <6 x float> [[RES]], <6 x float>* {{.*}}, align 4
@@ -327,7 +327,7 @@ void add_matrix_scalar_int_short(ix9x3_t a, short vs) {
   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:     [[SCALAR:%.*]] = load i16, i16* %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
   // CHECK-NEXT:   [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
-  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT]], i64 0
   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -342,7 +342,7 @@ void add_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -357,7 +357,7 @@ void subtract_compound_matrix_scalar_int_short(ix9x3_t a, short vs) {
   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -372,7 +372,7 @@ void add_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:     [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
-  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -387,7 +387,7 @@ void add_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -402,7 +402,7 @@ void subtract_compound_matrix_scalar_int_long_int(ix9x3_t a, long int vli) {
   // CHECK-NEXT:  [[SCALAR_TRUNC:%.*]] = trunc i64 %1 to i32
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -417,7 +417,7 @@ void add_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long long int
   // OPT:          [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:     [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
-  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -432,7 +432,7 @@ void add_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned long
   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4{{$}}
   // OPT-NEXT:     [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
   // CHECK-NEXT:   [[RES:%.*]] = add <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -447,7 +447,7 @@ void subtract_compound_matrix_scalar_int_unsigned_long_long(ix9x3_t a, unsigned
   // CHECK-NEXT:   [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32
   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4{{$}}
   // OPT-NEXT:     [[MATRIX:%.*]] = load <27 x i32>, <27 x i32>* [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <27 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0
   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <27 x i32> [[SCALAR_EMBED]], <27 x i32> poison, <27 x i32> zeroinitializer
   // CHECK-NEXT:   [[RES:%.*]] = sub <27 x i32> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -462,7 +462,7 @@ void add_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
   // CHECK-NEXT:    [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
   // NOOPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8{{$}}
   // OPT-NEXT:      [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
   // CHECK-NEXT:    store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@@ -477,7 +477,7 @@ void add_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs) {
   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@@ -492,7 +492,7 @@ void subtract_compound_matrix_scalar_long_long_int_short(ullx4x2_t b, short vs)
   // CHECK-NEXT:  [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64
   // NOOPT-NEXT:  [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8{{$}}
   // OPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:  store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@@ -506,7 +506,7 @@ void add_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
   // NOOPT-NEXT:    [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8{{$}}
   // OPT:           [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:      [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
+  // CHECK-NEXT:    [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
   // CHECK-NEXT:    [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
   // CHECK-NEXT:    [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
   // CHECK-NEXT:    store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@@ -520,7 +520,7 @@ void add_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli) {
   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8{{$}}
   // OPT:          [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@@ -534,7 +534,7 @@ void subtract_compound_matrix_scalar_long_long_int_int(ullx4x2_t b, long int vli
   // OPT:          [[SCALAR:%.*]] = load i64, i64* %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8{{$}}
   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@@ -548,7 +548,7 @@ void add_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, unsigned lo
   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8{{$}}
   // OPT:          [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]]
   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@@ -561,7 +561,7 @@ void add_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t b, un
   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8{{$}}
   // OPT:          [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
   // CHECK-NEXT:   [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@@ -575,7 +575,7 @@ void subtract_compound_matrix_scalar_long_long_int_unsigned_long_long(ullx4x2_t
   // NOOPT-NEXT:   [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8{{$}}
   // OPT:          [[SCALAR:%.*]] = load i64, i64* %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
   // OPT-NEXT:     [[MATRIX:%.*]] = load <8 x i64>, <8 x i64>* %0, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i32 0
+  // CHECK-NEXT:   [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0
   // CHECK-NEXT:   [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer
   // CHECK-NEXT:   [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK-NEXT:   store <8 x i64> [[RES]], <8 x i64>* {{.*}}, align 8
@@ -636,7 +636,7 @@ void multiply_matrix_matrix_int(ix9x3_t b, ix3x9_t c) {
 // OPT:           [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 // OPT-NEXT:      [[S:%.*]] = load float, float* %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -652,7 +652,7 @@ void multiply_double_matrix_scalar_float(dx5x5_t a, float s) {
 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
 // NOOPT-NEXT:    [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8{{$}}
 // OPT-NEXT:      [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -667,7 +667,7 @@ void multiply_compound_double_matrix_scalar_float(dx5x5_t a, float s) {
 // NOOPT-NEXT:    [[S:%.*]] = load double, double* %s.addr, align 8{{$}}
 // OPT:           [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 // OPT-NEXT:      [[S:%.*]] = load double, double* %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -682,7 +682,7 @@ void multiply_double_matrix_scalar_double(dx5x5_t a, double s) {
 // NOOPT-NEXT:    [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8{{$}}
 // OPT:           [[S:%.*]] = load double, double* %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 // OPT-NEXT:      [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -697,7 +697,7 @@ void multiply_compound_double_matrix_scalar_double(dx5x5_t a, double s) {
 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4{{$}}
 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
@@ -713,7 +713,7 @@ void multiply_float_matrix_scalar_double(fx2x3_t b, double s) {
 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4{{$}}
 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
 // store <6 x float> %3, <6 x float>* %0, align 4
@@ -728,7 +728,7 @@ void multiply_compound_float_matrix_scalar_double(fx2x3_t b, double s) {
 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4{{$}}
 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -744,7 +744,7 @@ void multiply_int_matrix_scalar_short(ix9x3_t b, short s) {
 // CHECK-NEXT:   [[S_EXT:%.*]] = sext i16 [[S]] to i32
 // NOOPT-NEXT:   [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4{{$}}
 // OPT-NEXT:     [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:   [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
+// CHECK-NEXT:   [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
 // CHECK-NEXT:   [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 // CHECK-NEXT:   [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
 // CHECK-NEXT:   store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -760,7 +760,7 @@ void multiply_compound_int_matrix_scalar_short(ix9x3_t b, short s) {
 // NOOPT-NEXT:    [[S:%.*]] = load i64, i64* %s.addr, align 8{{$}}
 // OPT-NEXT:      [[S:%.*]] = load i64, i64* %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -777,7 +777,7 @@ void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
   // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
   // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4{{$}}
   // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
+  // CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
   // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
   // CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
   // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -851,7 +851,7 @@ void multiply_compound_int_matrix_constant(ix9x3_t a) {
 // OPT:           [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 // OPT-NEXT:      [[S:%.*]] = load float, float* %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}}
 // CHECK-NEXT:    [[S_EXT:%.*]] = fpext float [[S]] to double
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -866,7 +866,7 @@ void divide_double_matrix_scalar_float(dx5x5_t a, float s) {
 // NOOPT-NEXT:    [[S:%.*]] = load double, double* %s.addr, align 8{{$}}
 // OPT:           [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
 // OPT-NEXT:      [[S:%.*]] = load double, double* %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
 // CHECK-NEXT:    store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
@@ -882,7 +882,7 @@ void divide_double_matrix_scalar_double(dx5x5_t a, double s) {
 // OPT:           [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 // OPT-NEXT:      [[S:%.*]] = load double, double* %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 // CHECK-NEXT:    [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
 // CHECK-NEXT:    store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
@@ -898,7 +898,7 @@ void divide_float_matrix_scalar_double(fx2x3_t b, double s) {
 // OPT:           [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 // OPT-NEXT:      [[S:%.*]] = load i16, i16* %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}}
 // CHECK-NEXT:    [[S_EXT:%.*]] = sext i16 [[S]] to i32
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -914,7 +914,7 @@ void divide_int_matrix_scalar_short(ix9x3_t b, short s) {
 // OPT:           [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}}
 // OPT-NEXT:      [[S:%.*]] = load i64, i64* %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
 // CHECK-NEXT:    [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
 // CHECK-NEXT:    store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -929,7 +929,7 @@ void divide_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
 // NOOPT-NEXT:    [[S:%.*]] = load i64, i64* %s.addr, align 8{{$}}
 // OPT:           [[MAT:%.*]] = load <8 x i64>, <8 x i64>* [[MAT_ADDR:%.*]], align 8, !tbaa !{{[0-9]+}}{{$}}
 // OPT-NEXT:      [[S:%.*]] = load i64, i64* %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i32 0
+// CHECK-NEXT:    [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i64 0
 // CHECK-NEXT:    [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 // CHECK-NEXT:    [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
 // CHECK-NEXT:    store <8 x i64> [[RES]], <8 x i64>* [[MAT_ADDR]], align 8

diff  --git a/clang/test/CodeGen/vecshift.c b/clang/test/CodeGen/vecshift.c
index 3932874e2cfed..3cb772a6feb40 100644
--- a/clang/test/CodeGen/vecshift.c
+++ b/clang/test/CodeGen/vecshift.c
@@ -70,35 +70,35 @@ void foo(void) {
 
   vc8 = c << vc8;
 // CHECK: [[t6:%.+]] = load i8, ptr @c,
-// CHECK: [[splat_splatinsert:%.+]] = insertelement <8 x i8> poison, i8 [[t6]], i32 0
+// CHECK: [[splat_splatinsert:%.+]] = insertelement <8 x i8> poison, i8 [[t6]], i64 0
 // CHECK: [[splat_splat:%.+]] = shufflevector <8 x i8> [[splat_splatinsert]], <8 x i8> poison, <8 x i32> zeroinitializer
 // CHECK: [[t7:%.+]] = load <8 x i8>, ptr {{@.+}},
 // CHECK: shl <8 x i8> [[splat_splat]], [[t7]]
   vuc8 = i << vuc8;
 // CHECK: [[t8:%.+]] = load i32, ptr @i,
 // CHECK: [[tconv:%.+]] = trunc i32 [[t8]] to i8
-// CHECK: [[splat_splatinsert7:%.+]] = insertelement <8 x i8> poison, i8 [[tconv]], i32 0
+// CHECK: [[splat_splatinsert7:%.+]] = insertelement <8 x i8> poison, i8 [[tconv]], i64 0
 // CHECK: [[splat_splat8:%.+]] = shufflevector <8 x i8> [[splat_splatinsert7]], <8 x i8> poison, <8 x i32> zeroinitializer
 // CHECK: [[t9:%.+]] = load <8 x i8>, ptr {{@.+}},
 // CHECK: shl <8 x i8> [[splat_splat8]], [[t9]]
   vi8 = uc << vi8;
 // CHECK: [[t10:%.+]] = load i8, ptr @uc,
 // CHECK: [[conv10:%.+]] = zext i8 [[t10]] to i32
-// CHECK: [[splat_splatinsert11:%.+]] = insertelement <8 x i32> poison, i32 [[conv10]], i32 0
+// CHECK: [[splat_splatinsert11:%.+]] = insertelement <8 x i32> poison, i32 [[conv10]], i64 0
 // CHECK: [[splat_splat12:%.+]] = shufflevector <8 x i32> [[splat_splatinsert11]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK: [[t11:%.+]] = load <8 x i32>, ptr {{@.+}},
 // CHECK: shl <8 x i32> [[splat_splat12]], [[t11]]
   vui8 = us << vui8;
 // CHECK: [[t12:%.+]] = load i16, ptr @us,
 // CHECK: [[conv14:%.+]] = zext i16 [[t12]] to i32
-// CHECK: [[splat_splatinsert15:%.+]] = insertelement <8 x i32> poison, i32 [[conv14]], i32 0
+// CHECK: [[splat_splatinsert15:%.+]] = insertelement <8 x i32> poison, i32 [[conv14]], i64 0
 // CHECK: [[splat_splat16:%.+]] = shufflevector <8 x i32> [[splat_splatinsert15]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK: [[t13:%.+]] = load <8 x i32>, ptr {{@.+}},
 // CHECK: shl <8 x i32> [[splat_splat16]], [[t13]]
   vs8 = ui << vs8;
 // CHECK: [[t14:%.+]] = load i32, ptr @ui,
 // CHECK: [[conv18:%.+]] = trunc i32 [[t14]] to i16
-// CHECK: [[splat_splatinsert19:%.+]] = insertelement <8 x i16> poison, i16 [[conv18]], i32 0
+// CHECK: [[splat_splatinsert19:%.+]] = insertelement <8 x i16> poison, i16 [[conv18]], i64 0
 // CHECK: [[splat_splat20:%.+]] = shufflevector <8 x i16> [[splat_splatinsert19]], <8 x i16> poison, <8 x i32> zeroinitializer
 // CHECK: [[t15:%.+]] = load <8 x i16>, ptr {{@.+}},
 // CHECK: shl <8 x i16> [[splat_splat20]], [[t15]]
@@ -134,27 +134,27 @@ void foo(void) {
   vui8 <<= s;
 // CHECK: [[t27:%.+]] = load i16, ptr @s,
 // CHECK: [[conv40:%.+]] = sext i16 [[t27]] to i32
-// CHECK: [[splat_splatinsert41:%.+]] = insertelement <8 x i32> poison, i32 [[conv40]], i32 0
+// CHECK: [[splat_splatinsert41:%.+]] = insertelement <8 x i32> poison, i32 [[conv40]], i64 0
 // CHECK: [[splat_splat42:%.+]] = shufflevector <8 x i32> [[splat_splatinsert41]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK: [[t28:%.+]] = load <8 x i32>, ptr {{@.+}},
 // CHECK: shl <8 x i32> [[t28]], [[splat_splat42]]
   vi8 <<= us;
 // CHECK: [[t29:%.+]] = load i16, ptr @us,
 // CHECK: [[conv44:%.+]] = zext i16 [[t29]] to i32
-// CHECK: [[splat_splatinsert45:%.+]] = insertelement <8 x i32> poison, i32 [[conv44]], i32 0
+// CHECK: [[splat_splatinsert45:%.+]] = insertelement <8 x i32> poison, i32 [[conv44]], i64 0
 // CHECK: [[splat_splat46:%.+]] = shufflevector <8 x i32> [[splat_splatinsert45]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK: [[t30:%.+]] = load <8 x i32>, ptr {{@.+}},
 // CHECK: shl <8 x i32> [[t30]], [[splat_splat46]]
   vus8 <<= i;
 // CHECK: [[t31:%.+]] = load i32, ptr @i,
-// CHECK: [[splat_splatinsert48:%.+]] = insertelement <8 x i32> poison, i32 [[t31]], i32 0
+// CHECK: [[splat_splatinsert48:%.+]] = insertelement <8 x i32> poison, i32 [[t31]], i64 0
 // CHECK: [[splat_splat49:%.+]] = shufflevector <8 x i32> [[splat_splatinsert48]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK: [[t32:%.+]] = load <8 x i16>, ptr {{@.+}},
 // CHECK: [[sh_prom50:%.+]] = trunc <8 x i32> [[splat_splat49]] to <8 x i16>
 // CHECK: shl <8 x i16> [[t32]], [[sh_prom50]]
   vs8 <<= ui;
 // CHECK: [[t33:%.+]] = load i32, ptr @ui,
-// CHECK: [[splat_splatinsert52:%.+]] = insertelement <8 x i32> poison, i32 [[t33]], i32 0
+// CHECK: [[splat_splatinsert52:%.+]] = insertelement <8 x i32> poison, i32 [[t33]], i64 0
 // CHECK: [[splat_splat53:%.+]] = shufflevector <8 x i32> [[splat_splatinsert52]], <8 x i32> poison, <8 x i32> zeroinitializer
 // CHECK: [[t34:%.+]] = load <8 x i16>, ptr {{@.+}},
 // CHECK: [[sh_prom54:%.+]] = trunc <8 x i32> [[splat_splat53]] to <8 x i16>

diff  --git a/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp b/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp
index 8fe502ec2b7e2..cdf389c5e7b52 100644
--- a/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp
+++ b/clang/test/CodeGenCXX/aarch64-sve-vector-conditional-op.cpp
@@ -164,10 +164,10 @@ svint32_t cond_i32_splat(svint32_t a) {
 
 // CHECK-LABEL: @_Z14cond_u32_splatu12__SVUint32_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 4 x i32> [[A:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 4 x i32> [[A:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 // CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 4 x i1> [[CMP]] to <vscale x 4 x i32>
 // CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 4 x i32> [[CONV]], zeroinitializer
-// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 4 x i1> [[VECTOR_COND]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 4 x i1> [[VECTOR_COND]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[VECTOR_SELECT]]
 //
 svuint32_t cond_u32_splat(svuint32_t a) {
@@ -188,10 +188,10 @@ svint64_t cond_i64_splat(svint64_t a) {
 
 // CHECK-LABEL: @_Z14cond_u64_splatu12__SVUint64_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 2 x i64> [[A:%.*]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <vscale x 2 x i64> [[A:%.*]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 // CHECK-NEXT:    [[CONV:%.*]] = zext <vscale x 2 x i1> [[CMP]] to <vscale x 2 x i64>
 // CHECK-NEXT:    [[VECTOR_COND:%.*]] = icmp ne <vscale x 2 x i64> [[CONV]], zeroinitializer
-// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 2 x i1> [[VECTOR_COND]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+// CHECK-NEXT:    [[VECTOR_SELECT:%.*]] = select <vscale x 2 x i1> [[VECTOR_COND]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[VECTOR_SELECT]]
 //
 svuint64_t cond_u64_splat(svuint64_t a) {

diff  --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp
index aeb2c9a7022bc..86c566f903e32 100644
--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -389,7 +389,7 @@ void ExplicitCasts() {
   uint16_t4 c;
   c = i;
   // CHECK: %[[CONV:.+]] = trunc i32 %{{.+}} to i16
-  // CHECK: %[[VEC:.+]] = insertelement <4 x i16> poison, i16 %[[CONV]], i32 0
+  // CHECK: %[[VEC:.+]] = insertelement <4 x i16> poison, i16 %[[CONV]], i64 0
   // CHECK: %[[Splat:.+]] = shufflevector <4 x i16> %[[VEC]], <4 x i16> poison, <4 x i32> zeroinitializer
 }
 

diff  --git a/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp b/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp
index 573b8ea2e74a7..139feab30cb2f 100644
--- a/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp
+++ b/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp
@@ -80,10 +80,10 @@ void TwoScalarOps() {
   four_shorts ? some_short : some_short;
   // CHECK: [[COND:%.+]] = load <4 x i16>
   // CHECK: [[LHS:%.+]] = load i16
-  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x i16> poison, i16 [[LHS]], i32 0
+  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x i16> poison, i16 [[LHS]], i64 0
   // CHECK: [[LHS_SPLAT:%.+]] = shufflevector <4 x i16> [[LHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
   // CHECK: [[RHS:%.+]] = load i16
-  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i16> poison, i16 [[RHS]], i32 0
+  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i16> poison, i16 [[RHS]], i64 0
   // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i16> [[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
   // CHECK: [[NEG:%.+]] = icmp slt <4 x i16> [[COND]], zeroinitializer
   // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i16>
@@ -95,10 +95,10 @@ void TwoScalarOps() {
   four_shorts ? some_ushort : some_ushort;
   // CHECK: [[COND:%.+]] = load <4 x i16>
   // CHECK: [[LHS:%.+]] = load i16
-  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x i16> poison, i16 [[LHS]], i32 0
+  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x i16> poison, i16 [[LHS]], i64 0
   // CHECK: [[LHS_SPLAT:%.+]] = shufflevector <4 x i16> [[LHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
   // CHECK: [[RHS:%.+]] = load i16
-  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i16> poison, i16 [[RHS]], i32 0
+  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i16> poison, i16 [[RHS]], i64 0
   // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i16> [[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
   // CHECK: [[NEG:%.+]] = icmp slt <4 x i16> [[COND]], zeroinitializer
   // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i16>
@@ -111,11 +111,11 @@ void TwoScalarOps() {
   // CHECK: [[COND:%.+]] = load <4 x i32>
   // CHECK: [[LHS:%.+]] = load i16
   // CHECK: [[LHS_ZEXT:%.+]] = zext i16 [[LHS]] to i32
-  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x i32> poison, i32 [[LHS_ZEXT]], i32 0
+  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x i32> poison, i32 [[LHS_ZEXT]], i64 0
   // CHECK: [[LHS_SPLAT:%.+]] = shufflevector <4 x i32> [[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: [[RHS:%.+]] = load i16
   // CHECK: [[RHS_SEXT:%.+]] = sext i16 [[RHS]] to i32
-  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i32> poison, i32 [[RHS_SEXT]], i32 0
+  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i32> poison, i32 [[RHS_SEXT]], i64 0
   // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
   // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
@@ -128,10 +128,10 @@ void TwoScalarOps() {
   // CHECK: [[COND:%.+]] = load <4 x i32>
   // CHECK: [[LHS:%.+]] = load i32
   // CHECK: [[LHS_CONV:%.+]] = sitofp i32 [[LHS]] to float
-  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x float> poison, float [[LHS_CONV]], i32 0
+  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x float> poison, float [[LHS_CONV]], i64 0
   // CHECK: [[LHS_SPLAT:%.+]] = shufflevector <4 x float> [[LHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
   // CHECK: [[RHS:%.+]] = load float
-  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x float> poison, float [[RHS]], i32 0
+  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x float> poison, float [[RHS]], i64 0
   // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x float> [[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
   // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
   // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
@@ -145,11 +145,11 @@ void TwoScalarOps() {
   four_ll ? some_double : some_ll;
   // CHECK: [[COND:%.+]] = load <4 x i64>
   // CHECK: [[LHS:%.+]] = load double
-  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x double> poison, double [[LHS]], i32 0
+  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x double> poison, double [[LHS]], i64 0
   // CHECK: [[LHS_SPLAT:%.+]] = shufflevector <4 x double> [[LHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer
   // CHECK: [[RHS:%.+]] = load i64
   // CHECK: [[RHS_CONV:%.+]] = sitofp i64 [[RHS]] to double
-  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x double> poison, double [[RHS_CONV]], i32 0
+  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x double> poison, double [[RHS_CONV]], i64 0
   // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x double> [[RHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer
   // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer
   // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64>
@@ -163,11 +163,11 @@ void TwoScalarOps() {
   four_ints ? some_int : some_short;
   // CHECK: [[COND:%.+]] = load <4 x i32>
   // CHECK: [[LHS:%.+]] = load i32
-  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x i32> poison, i32 [[LHS]], i32 0
+  // CHECK: [[LHS_SPLAT_INSERT:%.+]] = insertelement <4 x i32> poison, i32 [[LHS]], i64 0
   // CHECK: [[LHS_SPLAT:%.+]] = shufflevector <4 x i32> [[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: [[RHS:%.+]] = load i16
   // CHECK: [[RHS_SEXT:%.+]] = sext i16 [[RHS]] to i32
-  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i32> poison, i32 [[RHS_SEXT]], i32 0
+  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i32> poison, i32 [[RHS_SEXT]], i64 0
   // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
   // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
@@ -183,7 +183,7 @@ void OneScalarOp() {
   // CHECK: [[COND:%.+]] = load <4 x i32>
   // CHECK: [[LHS:%.+]] = load <4 x i32>
   // CHECK: [[RHS:%.+]] = load i32
-  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i32> poison, i32 [[RHS]], i32 0
+  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i32> poison, i32 [[RHS]], i64 0
   // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
   // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
@@ -206,7 +206,7 @@ void OneScalarOp() {
   // CHECK: [[COND:%.+]] = load <4 x i32>
   // CHECK: [[LHS:%.+]] = load <4 x float>
   // CHECK: [[RHS:%.+]] = load float
-  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x float> poison, float [[RHS]], i32 0
+  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x float> poison, float [[RHS]], i64 0
   // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x float> [[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
   // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
   // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
@@ -243,7 +243,7 @@ void OneScalarOp() {
   // CHECK: [[LHS:%.+]] = load <4 x i64>
   // CHECK: [[RHS:%.+]] = load i32
   // CHECK: [[RHS_CONV:%.+]] = sext i32 [[RHS]] to i64
-  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i64> poison, i64 [[RHS_CONV]], i32 0
+  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i64> poison, i64 [[RHS_CONV]], i64 0
   // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i64> [[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
   // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer
   // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64>
@@ -256,7 +256,7 @@ void OneScalarOp() {
   // CHECK: [[COND:%.+]] = load <4 x i64>
   // CHECK: [[LHS:%.+]] = load <4 x i64>
   // CHECK: [[RHS:%.+]] = load i64
-  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i64> poison, i64 [[RHS]], i32 0
+  // CHECK: [[RHS_SPLAT_INSERT:%.+]] = insertelement <4 x i64> poison, i64 [[RHS]], i64 0
   // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i64> [[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
   // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer
   // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64>

diff  --git a/clang/test/CodeGenCXX/matrix-type-operators.cpp b/clang/test/CodeGenCXX/matrix-type-operators.cpp
index d5d7e2d30a7d5..2eb832f1aca6d 100644
--- a/clang/test/CodeGenCXX/matrix-type-operators.cpp
+++ b/clang/test/CodeGenCXX/matrix-type-operators.cpp
@@ -66,7 +66,7 @@ void test_DoubleWrapper1_Sub1(MyMatrix<double, 10, 9> &m) {
   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
   // CHECK-NEXT:  [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
@@ -81,7 +81,7 @@ void test_DoubleWrapper1_Sub2(MyMatrix<double, 10, 9> &m) {
   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1)
   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
@@ -103,7 +103,7 @@ void test_DoubleWrapper2_Add1(MyMatrix<double, 10, 9> &m) {
   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8{{$}}
   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.+}}, align 8, !tbaa !{{[0-9]+}}{{$}}
   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
@@ -118,7 +118,7 @@ void test_DoubleWrapper2_Add2(MyMatrix<double, 10, 9> &m) {
   // CHECK:       [[SCALAR:%.*]] = call noundef double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2)
   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
@@ -141,7 +141,7 @@ void test_IntWrapper_Add(MyMatrix<double, 10, 9> &m) {
   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
   // CHECK-NEXT:  [[SCALAR:%.*]] = call noundef i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3)
   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fadd <90 x double> [[MATRIX]], [[SCALAR_EMBED1]]
   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
@@ -157,7 +157,7 @@ void test_IntWrapper_Sub(MyMatrix<double, 10, 9> &m) {
   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fsub <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8
@@ -200,7 +200,7 @@ void test_IntWrapper_Multiply(MyMatrix<double, 10, 9> &m, IntWrapper &w3) {
   // CHECK-NEXT:  [[SCALAR_FP:%.*]] = sitofp i32 %call to double
   // NOOPT:       [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8{{$}}
   // OPT:         [[MATRIX:%.*]] = load <90 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}}
-  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i32 0
+  // CHECK-NEXT:  [[SCALAR_EMBED:%.*]] = insertelement <90 x double> poison, double [[SCALAR_FP]], i64 0
   // CHECK-NEXT:  [[SCALAR_EMBED1:%.*]] = shufflevector <90 x double> [[SCALAR_EMBED]], <90 x double> poison, <90 x i32> zeroinitializer
   // CHECK-NEXT:  [[RES:%.*]] = fmul <90 x double> [[SCALAR_EMBED1]], [[MATRIX]]
   // CHECK:       store <90 x double> [[RES]], ptr {{.*}}, align 8

diff  --git a/clang/test/CodeGenCXX/vector-size-conditional.cpp b/clang/test/CodeGenCXX/vector-size-conditional.cpp
index 1a739bc56ed58..fcfaa76d8b8c4 100644
--- a/clang/test/CodeGenCXX/vector-size-conditional.cpp
+++ b/clang/test/CodeGenCXX/vector-size-conditional.cpp
@@ -62,10 +62,10 @@ void TwoScalarOps() {
   four_shorts ? some_short : some_short;
   // CHECK: %[[COND:.+]] = load <4 x i16>
   // CHECK: %[[LHS:.+]] = load i16
-  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[LHS]], i32 0
+  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[LHS]], i64 0
   // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i16> %[[LHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
   // CHECK: %[[RHS:.+]] = load i16
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[RHS]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[RHS]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i16> %[[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i16> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i16> %[[LHS_SPLAT]], <4 x i16> %[[RHS_SPLAT]]
@@ -73,10 +73,10 @@ void TwoScalarOps() {
   four_shorts ? some_ushort : some_ushort;
   // CHECK: %[[COND:.+]] = load <4 x i16>
   // CHECK: %[[LHS:.+]] = load i16
-  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[LHS]], i32 0
+  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[LHS]], i64 0
   // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i16> %[[LHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
   // CHECK: %[[RHS:.+]] = load i16
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[RHS]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i16> poison, i16 %[[RHS]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i16> %[[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i16> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i16> %[[LHS_SPLAT]], <4 x i16> %[[RHS_SPLAT]]
@@ -85,11 +85,11 @@ void TwoScalarOps() {
   // CHECK: %[[COND:.+]] = load <4 x i32>
   // CHECK: %[[LHS:.+]] = load i16
   // CHECK: %[[LHS_ZEXT:.+]] = zext i16 %[[LHS]] to i32
-  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS_ZEXT]], i32 0
+  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS_ZEXT]], i64 0
   // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: %[[RHS:.+]] = load i16
   // CHECK: %[[RHS_SEXT:.+]] = sext i16 %[[RHS]] to i32
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_SEXT]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_SEXT]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS_SPLAT]], <4 x i32> %[[RHS_SPLAT]]
@@ -98,10 +98,10 @@ void TwoScalarOps() {
   // CHECK: %[[COND:.+]] = load <4 x i32>
   // CHECK: %[[LHS:.+]] = load i32
   // CHECK: %[[LHS_CONV:.+]] = sitofp i32 %[[LHS]] to float
-  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[LHS_CONV]], i32 0
+  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[LHS_CONV]], i64 0
   // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x float> %[[LHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
   // CHECK: %[[RHS:.+]] = load float
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[RHS]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[RHS]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x float> %[[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x float> %[[LHS_SPLAT]], <4 x float> %[[RHS_SPLAT]]
@@ -109,11 +109,11 @@ void TwoScalarOps() {
   four_ll ? some_double : some_ll;
   // CHECK: %[[COND:.+]] = load <4 x i64>
   // CHECK: %[[LHS:.+]] = load double
-  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x double> poison, double %[[LHS]], i32 0
+  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x double> poison, double %[[LHS]], i64 0
   // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x double> %[[LHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer
   // CHECK: %[[RHS:.+]] = load i64
   // CHECK: %[[RHS_CONV:.+]] = sitofp i64 %[[RHS]] to double
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x double> poison, double %[[RHS_CONV]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x double> poison, double %[[RHS_CONV]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x double> %[[RHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x double> %[[LHS_SPLAT]], <4 x double> %[[RHS_SPLAT]]
@@ -121,11 +121,11 @@ void TwoScalarOps() {
   four_ints ? some_int : some_short;
   // CHECK: %[[COND:.+]] = load <4 x i32>
   // CHECK: %[[LHS:.+]] = load i32
-  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS]], i32 0
+  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS]], i64 0
   // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: %[[RHS:.+]] = load i16
   // CHECK: %[[RHS_SEXT:.+]] = sext i16 %[[RHS]] to i32
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_SEXT]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_SEXT]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS_SPLAT]], <4 x i32> %[[RHS_SPLAT]]
@@ -137,7 +137,7 @@ void OneScalarOp() {
   // CHECK: %[[COND:.+]] = load <4 x i32>
   // CHECK: %[[LHS:.+]] = load <4 x i32>
   // CHECK: %[[RHS:.+]] = load i32
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> %[[RHS_SPLAT]]
@@ -152,7 +152,7 @@ void OneScalarOp() {
   // CHECK: %[[COND:.+]] = load <4 x i32>
   // CHECK: %[[RHS:.+]] = load float
   // CHECK: %[[RHS_CONV:.+]] = fptosi float %[[RHS]] to i32
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_CONV]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[RHS_CONV]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i32> %[[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[COND]], <4 x i32> %[[RHS_SPLAT]]
@@ -167,7 +167,7 @@ void OneScalarOp() {
   // CHECK: %[[COND:.+]] = load <4 x i32>
   // CHECK: %[[LHS:.+]] = load float
   // CHECK: %[[LHS_CONV:.+]] = fptosi float %[[LHS]] to i32
-  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS_CONV]], i32 0
+  // CHECK: %[[LHS_SPLAT_INSERT:.+]] = insertelement <4 x i32> poison, i32 %[[LHS_CONV]], i64 0
   // CHECK: %[[LHS_SPLAT:.+]] = shufflevector <4 x i32> %[[LHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
   // CHECK: %[[RHS:.+]] = load <4 x i32>
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
@@ -177,7 +177,7 @@ void OneScalarOp() {
   // CHECK: %[[COND:.+]] = load <4 x i32>
   // CHECK: %[[LHS:.+]] = load <4 x float>
   // CHECK: %[[RHS:.+]] = load float
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[RHS]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x float> poison, float %[[RHS]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x float> %[[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x float> %[[LHS]], <4 x float> %[[RHS_SPLAT]]
@@ -205,7 +205,7 @@ void OneScalarOp() {
   // CHECK: %[[LHS:.+]] = load <4 x i64>
   // CHECK: %[[RHS:.+]] = load i32
   // CHECK: %[[RHS_CONV:.+]] = sext i32 %[[RHS]] to i64
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS_CONV]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS_CONV]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> %[[RHS_SPLAT]]
@@ -214,7 +214,7 @@ void OneScalarOp() {
   // CHECK: %[[COND:.+]] = load <4 x i64>
   // CHECK: %[[LHS:.+]] = load <4 x i64>
   // CHECK: %[[RHS:.+]] = load i64
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> %[[RHS_SPLAT]]
@@ -224,7 +224,7 @@ void OneScalarOp() {
   // CHECK: %[[LHS:.+]] = load <4 x i64>
   // CHECK: %[[RHS:.+]] = load double
   // CHECK: %[[RHS_CONV:.+]] = fptosi double %[[RHS]] to i64
-  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS_CONV]], i32 0
+  // CHECK: %[[RHS_SPLAT_INSERT:.+]] = insertelement <4 x i64> poison, i64 %[[RHS_CONV]], i64 0
   // CHECK: %[[RHS_SPLAT:.+]] = shufflevector <4 x i64> %[[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
   // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
   // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> %[[RHS_SPLAT]]

diff  --git a/clang/test/CodeGenCXX/vector-splat-conversion.cpp b/clang/test/CodeGenCXX/vector-splat-conversion.cpp
index b8dba9314d992..9046ade7289a0 100644
--- a/clang/test/CodeGenCXX/vector-splat-conversion.cpp
+++ b/clang/test/CodeGenCXX/vector-splat-conversion.cpp
@@ -56,7 +56,7 @@ gcc_int_2 FloatToIntConversion(gcc_int_2 Int2, float f) {
   // CHECK: %[[LOAD_INT:.+]] = load <2 x i32>
   // CHECK: %[[LOAD:.+]] = load float, ptr
   // CHECK: %[[CONV:.+]] = fptosi float %[[LOAD]] to i32
-  // CHECK: %[[INSERT:.+]] = insertelement <2 x i32> poison, i32 %[[CONV]], i32 0
+  // CHECK: %[[INSERT:.+]] = insertelement <2 x i32> poison, i32 %[[CONV]], i64 0
   // CHECK: %[[SPLAT:.+]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
   // CHECK: add <2 x i32> %[[LOAD_INT]], %[[SPLAT]]
 }

diff  --git a/clang/test/CodeGenOpenCL/bool_cast.cl b/clang/test/CodeGenOpenCL/bool_cast.cl
index 6e6538f00b4f2..8ba8c8a017623 100644
--- a/clang/test/CodeGenOpenCL/bool_cast.cl
+++ b/clang/test/CodeGenOpenCL/bool_cast.cl
@@ -11,7 +11,7 @@ void kernel ker() {
 // CHECK: {{%.*}} = load i8, ptr %t, align 1
 // CHECK: {{%.*}} = trunc i8 {{%.*}} to i1
 // CHECK: {{%.*}} = sext i1 {{%.*}} to i32
-// CHECK: {{%.*}} = insertelement <4 x i32> poison, i32 {{%.*}}, i32 0
+// CHECK: {{%.*}} = insertelement <4 x i32> poison, i32 {{%.*}}, i64 0
 // CHECK: {{%.*}} = shufflevector <4 x i32> {{%.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK: store <4 x i32> {{%.*}}, ptr %vec4, align 16
   int i = (int)t;

diff  --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index f777b1be4d825..a53671183f77c 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -1415,11 +1415,11 @@ Constant *ConstantVector::getSplat(ElementCount EC, Constant *V) {
   else if (isa<UndefValue>(V))
     return UndefValue::get(VTy);
 
-  Type *I32Ty = Type::getInt32Ty(VTy->getContext());
+  Type *IdxTy = Type::getInt64Ty(VTy->getContext());
 
   // Move scalar into vector.
   Constant *PoisonV = PoisonValue::get(VTy);
-  V = ConstantExpr::getInsertElement(PoisonV, V, ConstantInt::get(I32Ty, 0));
+  V = ConstantExpr::getInsertElement(PoisonV, V, ConstantInt::get(IdxTy, 0));
   // Build shuffle mask to perform the splat.
   SmallVector<int, 8> Zeros(EC.getKnownMinValue(), 0);
   // Splat.

diff  --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index c4923bca83a55..f871205843a76 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -1257,10 +1257,8 @@ Value *IRBuilderBase::CreateVectorSplat(ElementCount EC, Value *V,
   assert(EC.isNonZero() && "Cannot splat to an empty vector!");
 
   // First insert it into a poison vector so we can shuffle it.
-  Type *I32Ty = getInt32Ty();
   Value *Poison = PoisonValue::get(VectorType::get(V->getType(), EC));
-  V = CreateInsertElement(Poison, V, ConstantInt::get(I32Ty, 0),
-                          Name + ".splatinsert");
+  V = CreateInsertElement(Poison, V, getInt64(0), Name + ".splatinsert");
 
   // Shuffle the value across the desired number of elements.
   SmallVector<int, 16> Zeros;

diff  --git a/llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll b/llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll
index 514798af69d74..2e2dba50c8459 100644
--- a/llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll
+++ b/llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll
@@ -4,7 +4,7 @@
 ; Fixed vectors
 define <4 x i32> @vpgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: @vpgather_v4i32(
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EVL:%.*]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EVL:%.*]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i1> [[TMP1]], [[M:%.*]]
@@ -17,7 +17,7 @@ define <4 x i32> @vpgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl)
 
 define <2 x i64> @vpgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: @vpgather_v2i64(
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[M:%.*]]
@@ -30,7 +30,7 @@ define <2 x i64> @vpgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl)
 
 define void @vpscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: @vpscatter_v4i32(
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EVL:%.*]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EVL:%.*]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i1> [[TMP1]], [[M:%.*]]
@@ -43,7 +43,7 @@ define void @vpscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 z
 
 define void @vpscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: @vpscatter_v2i64(
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[M:%.*]]

diff  --git a/llvm/test/CodeGen/Generic/expand-vp-load-store.ll b/llvm/test/CodeGen/Generic/expand-vp-load-store.ll
index 89e9a4b307675..8984d020f99ed 100644
--- a/llvm/test/CodeGen/Generic/expand-vp-load-store.ll
+++ b/llvm/test/CodeGen/Generic/expand-vp-load-store.ll
@@ -5,7 +5,7 @@
 ; Fixed vectors
 define <2 x i64> @vpload_v2i64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: @vpload_v2i64(
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[M:%.*]]
@@ -27,7 +27,7 @@ define <2 x i64> @vpload_v2i64_vlmax(ptr %ptr, <2 x i1> %m) {
 
 define <2 x i64> @vpload_v2i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
 ; CHECK-LABEL: @vpload_v2i64_allones_mask(
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i1> [[TMP1]], <i1 true, i1 true>
@@ -49,7 +49,7 @@ define <2 x i64> @vpload_v2i64_allones_mask_vlmax(ptr %ptr) {
 
 define void @vpstore_v2i64(<2 x i64> %val, ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: @vpstore_v2i64(
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[M:%.*]]
@@ -71,7 +71,7 @@ define void @vpstore_v2i64_vlmax(<2 x i64> %val, ptr %ptr, <2 x i1> %m) {
 
 define void @vpstore_v2i64_allones_mask(<2 x i64> %val, ptr %ptr, i32 zeroext %evl) {
 ; CHECK-LABEL: @vpstore_v2i64_allones_mask(
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i1> [[TMP1]], <i1 true, i1 true>
@@ -121,13 +121,13 @@ define <vscale x 1 x i64> @vpload_nxv1i64_vscale(ptr %ptr, <vscale x 1 x i1> %m)
 define <vscale x 1 x i64> @vpload_nxv1i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
 ; CHECK-LABEL: @vpload_nxv1i64_allones_mask(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]])
-; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[PTR:%.*]], i32 1, <vscale x 1 x i1> [[TMP2]], <vscale x 1 x i64> poison)
 ; CHECK-NEXT:    ret <vscale x 1 x i64> [[TMP3]]
 ;
-  %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %evl)
+  %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %evl)
   ret <vscale x 1 x i64> %load
 }
 
@@ -140,7 +140,7 @@ define <vscale x 1 x i64> @vpload_nxv1i64_allones_mask_vscale(ptr %ptr) {
 ;
   %vscale = call i32 @llvm.vscale.i32()
   %vlmax = mul nuw i32 %vscale, 1
-  %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %vlmax)
+  %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %vlmax)
   ret <vscale x 1 x i64> %load
 }
 
@@ -173,13 +173,13 @@ define void @vpstore_nxv1i64_vscale(<vscale x 1 x i64> %val, ptr %ptr, <vscale x
 define void @vpstore_nxv1i64_allones_mask(<vscale x 1 x i64> %val, ptr %ptr, i32 zeroext %evl) {
 ; CHECK-LABEL: @vpstore_nxv1i64_allones_mask(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]])
-; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1
 ; CHECK-NEXT:    call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 1 x i1> [[TMP2]])
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %evl)
+  call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %evl)
   ret void
 }
 
@@ -192,7 +192,7 @@ define void @vpstore_nxv1i64_allones_mask_vscale(<vscale x 1 x i64> %val, ptr %p
 ;
   %vscale = call i32 @llvm.vscale.i32()
   %vlmax = mul nuw i32 %vscale, 1
-  call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %vlmax)
+  call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %vlmax)
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/Generic/expand-vp.ll b/llvm/test/CodeGen/Generic/expand-vp.ll
index e31b040032a7b..509f86a64d9ce 100644
--- a/llvm/test/CodeGen/Generic/expand-vp.ll
+++ b/llvm/test/CodeGen/Generic/expand-vp.ll
@@ -154,7 +154,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; ALL-CONVERT-NEXT:  %{{.*}} = add <8 x i32> %i0, %i1
 ; ALL-CONVERT-NEXT:  %{{.*}} = sub <8 x i32> %i0, %i1
 ; ALL-CONVERT-NEXT:  %{{.*}} = mul <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT:  [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
+; ALL-CONVERT-NEXT:  [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i64 0
 ; ALL-CONVERT-NEXT:  [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; ALL-CONVERT-NEXT:  [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
 ; ALL-CONVERT-NEXT:  [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
@@ -177,7 +177,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 
 ; Check that reductions use the correct neutral element for masked-off elements
 ; ALL-CONVERT: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
-; ALL-CONVERT-NEXT:  [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; ALL-CONVERT-NEXT:  [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
 ; ALL-CONVERT-NEXT:  [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; ALL-CONVERT-NEXT:  [[EVLM:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
 ; ALL-CONVERT-NEXT:  [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m
@@ -212,7 +212,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 
 ; Check that reductions use the correct neutral element for masked-off elements
 ; ALL-CONVERT: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
-; ALL-CONVERT-NEXT:  [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; ALL-CONVERT-NEXT:  [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
 ; ALL-CONVERT-NEXT:  [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; ALL-CONVERT-NEXT:  [[EVLM:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
 ; ALL-CONVERT-NEXT:  [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m
@@ -334,7 +334,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; DISCARD_LEGAL-NEXT:   %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
 ; DISCARD_LEGAL-NEXT:   %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
 ; DISCARD_LEGAL-NEXT:   %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT:   [[NSPLATINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
+; DISCARD_LEGAL-NEXT:   [[NSPLATINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i64 0
 ; DISCARD_LEGAL-NEXT:   [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NSPLATINS]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; DISCARD_LEGAL-NEXT:   [[EVLMASK:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
 ; DISCARD_LEGAL-NEXT:   [[NEWMASK:%.+]] = and <8 x i1> [[EVLMASK]], %m
@@ -367,7 +367,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; DISCARD_LEGAL:      ret void
 
 ; DISCARD_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT:  [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; DISCARD_LEGAL-NEXT:  [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
 ; DISCARD_LEGAL-NEXT:  [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NSPLATINS]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; DISCARD_LEGAL-NEXT:  [[EVLMASK:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
 ; DISCARD_LEGAL-NEXT:  [[NEWMASK:%.+]] = and <4 x i1> [[EVLMASK]], %m
@@ -383,7 +383,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; DISCARD_LEGAL:      ret void
 
 ; DISCARD_LEGAL: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT:  [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; DISCARD_LEGAL-NEXT:  [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
 ; DISCARD_LEGAL-NEXT:  [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NSPLATINS]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; DISCARD_LEGAL-NEXT:  [[EVLMASK:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
 ; DISCARD_LEGAL-NEXT:  [[NEWMASK:%.+]] = and <4 x i1> [[EVLMASK]], %m
@@ -413,7 +413,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; (3) All VP intrinsics have an ineffective %evl parameter.
 ;
 ; CONVERT_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT:  [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
+; CONVERT_LEGAL-NEXT:  [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i64 0
 ; CONVERT_LEGAL-NEXT:  [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CONVERT_LEGAL-NEXT:  [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
 ; CONVERT_LEGAL-NEXT:  [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
@@ -443,7 +443,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; CONVERT_LEGAL:        ret void
 
 ; CONVERT_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT:  [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; CONVERT_LEGAL-NEXT:  [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
 ; CONVERT_LEGAL-NEXT:  [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CONVERT_LEGAL-NEXT:  [[EVLM:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
 ; CONVERT_LEGAL-NEXT:  [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m
@@ -459,7 +459,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; CONVERT_LEGAL: ret void
 
 ; CONVERT_LEGAL: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT:  [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; CONVERT_LEGAL-NEXT:  [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
 ; CONVERT_LEGAL-NEXT:  [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CONVERT_LEGAL-NEXT:  [[EVLM:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
 ; CONVERT_LEGAL-NEXT:  [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m
@@ -476,7 +476,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x
 ; CONVERT_LEGAL: ret void
 
 ; CONVERT_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT:  [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i32 0
+; CONVERT_LEGAL-NEXT:  [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i64 0
 ; CONVERT_LEGAL-NEXT:  [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CONVERT_LEGAL-NEXT:  [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
 ; CONVERT_LEGAL-NEXT:  [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m

diff  --git a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
index fca25d86a7d36..581c499c29fb3 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
@@ -9,7 +9,7 @@ define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture reado
 ; CHECK-NEXT:    [[PUSHEDOUTADD:%.*]] = add <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 6, i32 6, i32 6, i32 6>
 ; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]]
 ; CHECK-NEXT:    [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], <i32 32, i32 32, i32 32, i32 32>
@@ -70,7 +70,7 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_block(i32* noalias nocapture reado
 ; CHECK-NEXT:    [[PUSHEDOUTADD:%.*]] = add <4 x i32> [[PUSHEDOUTMUL]], <i32 6, i32 6, i32 6, i32 6>
 ; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]]
 ; CHECK-NEXT:    [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], <i32 96, i32 96, i32 96, i32 96>
@@ -139,7 +139,7 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_loop(i32* noalias nocapture readon
 ; CHECK-NEXT:    [[TMP0:%.*]] = mul <4 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3>
 ; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[TMP0]], <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> [[STARTINDEX]], i32 24)

diff  --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll
index 77dd9dd33a68a..0114d7f9f4091 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll
@@ -73,7 +73,7 @@ define <vscale x 4 x i32> @test_global_array(<vscale x 4 x i64> %indxs, <vscale
 
 define <vscale x 4 x i32> @global_struct_splat(<vscale x 4 x i1> %mask) #0 {
 ; CHECK-LABEL: @global_struct_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> shufflevector (<vscale x 4 x ptr> insertelement (<vscale x 4 x ptr> poison, ptr getelementptr inbounds ([[STRUCT_A:%.*]], ptr @c, i64 0, i32 1), i32 0), <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer), i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> shufflevector (<vscale x 4 x ptr> insertelement (<vscale x 4 x ptr> poison, ptr getelementptr inbounds ([[STRUCT_A:%.*]], ptr @c, i64 0, i32 1), i64 0), <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer), i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 ;
   %1 = insertelement <vscale x 4 x ptr> poison, ptr @c, i32 0

diff  --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll
index 28d533541f448..e4c5b4ceee542 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt.ll
@@ -73,7 +73,7 @@ define <vscale x 4 x i32> @test_global_array(<vscale x 4 x i64> %indxs, <vscale
 
 define <vscale x 4 x i32> @global_struct_splat(<vscale x 4 x i1> %mask) #0 {
 ; CHECK-LABEL: @global_struct_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> shufflevector (<vscale x 4 x ptr> insertelement (<vscale x 4 x ptr> poison, ptr getelementptr inbounds ([[STRUCT_A:%.*]], ptr @c, i64 0, i32 1), i32 0), <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer), i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> shufflevector (<vscale x 4 x ptr> insertelement (<vscale x 4 x ptr> poison, ptr getelementptr inbounds ([[STRUCT_A:%.*]], ptr @c, i64 0, i32 1), i64 0), <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer), i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 ;
   %1 = insertelement <vscale x 4 x ptr> undef, ptr @c, i32 0

diff  --git a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll
index 93d940f2de3ba..051f764bb8bc2 100644
--- a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll
+++ b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll
@@ -40,7 +40,7 @@ define <vscale x 2 x i1> @test.scalable.vectorgep.ult.true(<vscale x 2 x ptr> %v
 ; CHECK-LABEL: @test.scalable.vectorgep.ult.true(
 ; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, <vscale x 2 x ptr> [[VEC:%.*]], i64 1
 ; CHECK-NEXT:    [[T_1:%.*]] = icmp ult <vscale x 2 x ptr> [[VEC]], [[GEP_1]]
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %gep.1 = getelementptr inbounds i32, <vscale x 2 x ptr> %vec, i64 1
   %t.1 = icmp ult <vscale x 2 x ptr> %vec, %gep.1

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
index 4a747a17af144..d6434ad2b4700 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
@@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <vscale x 8 x i16> @srshl_abs_undef_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_undef_merge(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
   %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
@@ -18,7 +18,7 @@ define <vscale x 8 x i16> @srshl_abs_undef_merge(<vscale x 8 x i16> %a, <vscale
 define <vscale x 8 x i16> @srshl_abs_zero_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_zero_merge(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
   %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
@@ -29,8 +29,8 @@ define <vscale x 8 x i16> @srshl_abs_zero_merge(<vscale x 8 x i16> %a, <vscale x
 
 define <vscale x 8 x i16> @srshl_abs_positive_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_positive_merge(
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
   %absmerge = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
@@ -44,7 +44,7 @@ define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vsc
 ; CHECK-LABEL: @srshl_abs_all_active_pred(
 ; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
   %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -57,7 +57,7 @@ define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vsc
 define <vscale x 8 x i16> @srshl_abs_same_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: @srshl_abs_same_pred(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
   %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
@@ -69,7 +69,7 @@ define <vscale x 8 x i16> @srshl_abs_same_pred(<vscale x 8 x i16> %a, <vscale x
 define <vscale x 8 x i16> @srshl_sqabs(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_sqabs(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
   %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
@@ -80,8 +80,8 @@ define <vscale x 8 x i16> @srshl_sqabs(<vscale x 8 x i16> %a, <vscale x 8 x i1>
 
 define <vscale x 8 x i16> @srshl_abs_negative_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_negative_merge(
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
 ;
   %absmerge = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 -1)
@@ -94,7 +94,7 @@ define <vscale x 8 x i16> @srshl_abs_negative_merge(<vscale x 8 x i16> %a, <vsca
 define <vscale x 8 x i16> @srshl_abs_nonconst_merge(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_nonconst_merge(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
 ;
   %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
@@ -107,7 +107,7 @@ define <vscale x 8 x i16> @srshl_abs_not_all_active_pred(<vscale x 8 x i16> %a,
 ; CHECK-LABEL: @srshl_abs_not_all_active_pred(
 ; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 8)
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
 ;
   %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 8)
@@ -120,7 +120,7 @@ define <vscale x 8 x i16> @srshl_abs_not_all_active_pred(<vscale x 8 x i16> %a,
 define <vscale x 8 x i16> @srshl_abs_
diff _pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_
diff _pred(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
 ;
   %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
@@ -132,7 +132,7 @@ define <vscale x 8 x i16> @srshl_abs_
diff _pred(<vscale x 8 x i16> %a, <vscale x
 define <vscale x 8 x i16> @srshl_abs_negative_shift(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_negative_shift(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
-; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
 ;
   %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
index ed3ca6700854b..85978ff450e09 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
@@ -32,7 +32,7 @@ define <vscale x 2 x double> @idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale
 
 define <vscale x 2 x double> @idempotent_fmul_
diff erent_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: @idempotent_fmul_
diff erent_argument_order(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+00, i32 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> [[A:%.*]])
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 ;
   %1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0)
@@ -54,7 +54,7 @@ define <vscale x 8 x half> @idempotent_fmul_two_dups(<vscale x 8 x i1> %pg, <vsc
   ; Edge case -- make sure that the case where we're fmultiplying two dups
   ; together is sane.
 ; CHECK-LABEL: @idempotent_fmul_two_dups(
-; CHECK-NEXT:    ret <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH3C00, i32 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH3C00, i64 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer)
 ;
   %1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
   %2 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 1.0)
@@ -65,7 +65,7 @@ define <vscale x 8 x half> @idempotent_fmul_two_dups(<vscale x 8 x i1> %pg, <vsc
 ; Non-idempotent fmuls -- we don't expect these to be optimised out.
 define <vscale x 8 x half> @non_idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
 ; CHECK-LABEL: @non_idempotent_fmul_f16(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH4000, i32 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> shufflevector (<vscale x 8 x half> insertelement (<vscale x 8 x half> poison, half 0xH4000, i64 0), <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 ;
   %1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 2.0)
@@ -75,7 +75,7 @@ define <vscale x 8 x half> @non_idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vsca
 
 define <vscale x 4 x float> @non_idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
 ; CHECK-LABEL: @non_idempotent_fmul_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 ;
   %1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.0)
@@ -85,7 +85,7 @@ define <vscale x 4 x float> @non_idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vsc
 
 define <vscale x 2 x double> @non_idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: @non_idempotent_fmul_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 2.000000e+00, i32 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 2.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 ;
   %1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.0)

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll
index a4cfb06d98aa8..027f671712bed 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll
@@ -32,7 +32,7 @@ define <vscale x 2 x i64> @idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2
 
 define <vscale x 2 x i64> @idempotent_mul_
diff erent_argument_order(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 ; CHECK-LABEL: @idempotent_mul_
diff erent_argument_order(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> [[A:%.*]])
 ; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 ;
   %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 1)
@@ -54,7 +54,7 @@ define <vscale x 8 x i16> @idempotent_mul_two_dups(<vscale x 8 x i1> %pg, <vscal
   ; Edge case -- make sure that the case where we're multiplying two dups
   ; together is sane.
 ; CHECK-LABEL: @idempotent_mul_two_dups(
-; CHECK-NEXT:    ret <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 1, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
 ;
   %1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 1)
   %2 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 1)
@@ -65,7 +65,7 @@ define <vscale x 8 x i16> @idempotent_mul_two_dups(<vscale x 8 x i1> %pg, <vscal
 ; Non-idempotent muls -- we don't expect these to be optimised out.
 define <vscale x 8 x i16> @non_idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) #0 {
 ; CHECK-LABEL: @non_idempotent_mul_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
   %1 = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
@@ -75,7 +75,7 @@ define <vscale x 8 x i16> @non_idempotent_mul_i16(<vscale x 8 x i1> %pg, <vscale
 
 define <vscale x 4 x i32> @non_idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
 ; CHECK-LABEL: @non_idempotent_mul_i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 ;
   %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 2)
@@ -85,7 +85,7 @@ define <vscale x 4 x i32> @non_idempotent_mul_i32(<vscale x 4 x i1> %pg, <vscale
 
 define <vscale x 2 x i64> @non_idempotent_mul_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) #0 {
 ; CHECK-LABEL: @non_idempotent_mul_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 ;
   %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 2)

diff  --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll
index 9d06337354963..3d34e3c57bf58 100644
--- a/llvm/test/Transforms/InstCombine/div.ll
+++ b/llvm/test/Transforms/InstCombine/div.ll
@@ -1062,7 +1062,7 @@ define <2 x i8> @sdiv_by_negconst_v2i8(<2 x i8> %x) {
 
 define <vscale x 2 x i8> @sdiv_by_negconst_nxv2i8(<vscale x 2 x i8> %x) {
 ; CHECK-LABEL: @sdiv_by_negconst_nxv2i8(
-; CHECK-NEXT:    [[DIV_NEG:%.*]] = sdiv <vscale x 2 x i8> [[X:%.*]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 108, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[DIV_NEG:%.*]] = sdiv <vscale x 2 x i8> [[X:%.*]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 108, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x i8> [[DIV_NEG]]
 ;
   %div = sdiv <vscale x 2 x i8> %x, shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 -108, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)

diff  --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index dd49f8bca91da..c81a9ba6d4215 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -87,10 +87,10 @@ define <2 x float> @exact_inverse_splat(<2 x float> %x) {
 
 define <vscale x 2 x float> @exact_inverse_scalable_splat(<vscale x 2 x float> %x) {
 ; CHECK-LABEL: @exact_inverse_scalable_splat(
-; CHECK-NEXT:    [[DIV:%.*]] = fmul <vscale x 2 x float> [[X:%.*]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> poison, float 2.500000e-01, i32 0), <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[DIV:%.*]] = fmul <vscale x 2 x float> [[X:%.*]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> poison, float 2.500000e-01, i64 0), <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x float> [[DIV]]
 ;
-  %div = fdiv <vscale x 2 x float> %x, shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 4.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
+  %div = fdiv <vscale x 2 x float> %x, shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 4.0, i64 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x float> %div
 }
 

diff  --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index b84a5bd56a6d2..6d2a65a418af0 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -795,7 +795,7 @@ define <2 x float> @fmul_fadd_distribute_vec(<2 x float> %x) {
 define <vscale x 2 x float> @fmul_fadd_distribute_scalablevec(<vscale x 2 x float> %x) {
 ; CHECK-LABEL: @fmul_fadd_distribute_scalablevec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc <vscale x 2 x float> [[X:%.*]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 6.000000e+03, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc <vscale x 2 x float> [[TMP1]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> poison, float 1.200000e+07, i32 0), <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc <vscale x 2 x float> [[TMP1]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> poison, float 1.200000e+07, i64 0), <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x float> [[T3]]
 ;
   %t1 = fadd <vscale x 2 x float> shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 2.0e+3, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer), %x

diff  --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 2aca9fc527831..0b29984913b4a 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -99,7 +99,7 @@ define <2 x i1> @test9vec(<2 x i1> %C, <2 x i1> %X) {
 
 define <vscale x 2 x i1> @test9vvec(<vscale x 2 x i1> %C, <vscale x 2 x i1> %X) {
 ; CHECK-LABEL: @test9vvec(
-; CHECK-NEXT:    [[NOT_C:%.*]] = xor <vscale x 2 x i1> [[C:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor <vscale x 2 x i1> [[C:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[R:%.*]] = select <vscale x 2 x i1> [[NOT_C]], <vscale x 2 x i1> [[X:%.*]], <vscale x 2 x i1> zeroinitializer
 ; CHECK-NEXT:    ret <vscale x 2 x i1> [[R]]
 ;
@@ -3388,7 +3388,7 @@ define <vscale x 2 x i32> @scalable_sign_bits(<vscale x 2 x i8> %x) {
 define <vscale x 2 x i1> @scalable_non_zero(<vscale x 2 x i32> %x) {
 ; CHECK-LABEL: @scalable_non_zero(
 ; CHECK-NEXT:    [[A:%.*]] = or <vscale x 2 x i32> [[X:%.*]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ule <vscale x 2 x i32> [[A]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 56, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ule <vscale x 2 x i32> [[A]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 56, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x i1> [[CMP]]
 ;
   %a = or <vscale x 2 x i32> %x, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)

diff  --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index 69fe99fe8521a..e871b109127ce 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -841,7 +841,7 @@ define <2 x i32> @test44vec(<2 x i32> %x) {
 
 define <vscale x 2 x i32> @test44scalablevec(<vscale x 2 x i32> %x) {
 ; CHECK-LABEL: @test44scalablevec(
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw <vscale x 2 x i32> [[X:%.*]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 -32768, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw <vscale x 2 x i32> [[X:%.*]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 -32768, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x i32> [[SUB]]
 ;
   %sub = sub nsw <vscale x 2 x i32> %x, shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 32768, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer)
@@ -861,7 +861,7 @@ define <2 x i16> @test44vecminval(<2 x i16> %x) {
 ; uses m_ImmConstant which matches Constant but (explicitly) not ConstantExpr.
 define <vscale x 2 x i16> @test44scalablevecminval(<vscale x 2 x i16> %x) {
 ; CHECK-LABEL: @test44scalablevecminval(
-; CHECK-NEXT:    [[SUB:%.*]] = add <vscale x 2 x i16> [[X:%.*]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 -32768, i32 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[SUB:%.*]] = add <vscale x 2 x i16> [[X:%.*]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 -32768, i64 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x i16> [[SUB]]
 ;
   %sub = sub nsw <vscale x 2 x i16> %x, shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> undef, i16 -32768, i32 0), <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer)

diff  --git a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
index f63db744e4551..de47b22edb7cd 100644
--- a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
+++ b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
@@ -42,7 +42,7 @@ define <2 x i8> @trunc_ctlz_zext_v2i8_v2i33(<2 x i8> %x) {
 define <vscale x 2 x i16> @trunc_ctlz_zext_nxv2i16_nxv2i64(<vscale x 2 x i16> %x) {
 ; CHECK-LABEL: @trunc_ctlz_zext_nxv2i16_nxv2i64(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[ZZ:%.*]] = add nuw nsw <vscale x 2 x i16> [[TMP1]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 48, i32 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[ZZ:%.*]] = add nuw nsw <vscale x 2 x i16> [[TMP1]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 48, i64 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x i16> [[ZZ]]
 ;
   %z = zext <vscale x 2 x i16> %x to <vscale x 2 x i64>
@@ -74,7 +74,7 @@ define <vscale x 2 x i16> @trunc_ctlz_zext_nxv2i16_nxv2i63_multiple_uses(<vscale
 ; CHECK-LABEL: @trunc_ctlz_zext_nxv2i16_nxv2i63_multiple_uses(
 ; CHECK-NEXT:    [[Z:%.*]] = zext <vscale x 2 x i16> [[X:%.*]] to <vscale x 2 x i63>
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> [[X]], i1 true)
-; CHECK-NEXT:    [[ZZ:%.*]] = add nuw nsw <vscale x 2 x i16> [[TMP1]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 47, i32 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[ZZ:%.*]] = add nuw nsw <vscale x 2 x i16> [[TMP1]], shufflevector (<vscale x 2 x i16> insertelement (<vscale x 2 x i16> poison, i16 47, i64 0), <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    call void @use1(<vscale x 2 x i63> [[Z]])
 ; CHECK-NEXT:    ret <vscale x 2 x i16> [[ZZ]]
 ;

diff  --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
index d4d47da7c74dd..b5bdbfcf24d82 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
@@ -43,7 +43,7 @@ define <vscale x 4 x i32> @sub() {
 
 define <vscale x 4 x i32> @sub_splat() {
 ; CHECK-LABEL: @sub_splat(
-; CHECK-NEXT:    ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -16, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -16, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ;
   %r = sub <vscale x 4 x i32> zeroinitializer, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
   ret <vscale x 4 x i32> %r
@@ -262,7 +262,7 @@ define <vscale x 4 x i1> @icmp_undef() {
 
 define <vscale x 4 x i1> @icmp_zero() {
 ; CHECK-LABEL: @icmp_zero(
-; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ;
   %r = icmp eq <vscale x 4 x i32> zeroinitializer, zeroinitializer
   ret <vscale x 4 x i1> %r
@@ -270,7 +270,7 @@ define <vscale x 4 x i1> @icmp_zero() {
 
 define <vscale x 4 x i1> @fcmp_true() {
 ; CHECK-LABEL: @fcmp_true(
-; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ;
   %r = fcmp true <vscale x 4 x float> undef, undef
   ret <vscale x 4 x i1> %r
@@ -294,7 +294,7 @@ define <vscale x 4 x i1> @fcmp_undef() {
 
 define <vscale x 4 x i1> @fcmp_not_equality() {
 ; CHECK-LABEL: @fcmp_not_equality(
-; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ;
   %r = icmp ule <vscale x 4 x i32> undef, zeroinitializer
   ret <vscale x 4 x i1> %r

diff  --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
index 4c41867cce751..0d597939cd13d 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
@@ -43,7 +43,7 @@ define <vscale x 4 x i32> @sub() {
 
 define <vscale x 4 x i32> @sub_splat() {
 ; CHECK-LABEL: @sub_splat(
-; CHECK-NEXT:    ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -16, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 -16, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ;
   %r = sub <vscale x 4 x i32> zeroinitializer, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
   ret <vscale x 4 x i32> %r
@@ -262,7 +262,7 @@ define <vscale x 4 x i1> @icmp_undef() {
 
 define <vscale x 4 x i1> @icmp_zero() {
 ; CHECK-LABEL: @icmp_zero(
-; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ;
   %r = icmp eq <vscale x 4 x i32> zeroinitializer, zeroinitializer
   ret <vscale x 4 x i1> %r
@@ -270,7 +270,7 @@ define <vscale x 4 x i1> @icmp_zero() {
 
 define <vscale x 4 x i1> @fcmp_true() {
 ; CHECK-LABEL: @fcmp_true(
-; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ;
   %r = fcmp true <vscale x 4 x float> undef, undef
   ret <vscale x 4 x i1> %r
@@ -294,7 +294,7 @@ define <vscale x 4 x i1> @fcmp_undef() {
 
 define <vscale x 4 x i1> @fcmp_not_equality() {
 ; CHECK-LABEL: @fcmp_not_equality(
-; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ;
   %r = icmp ule <vscale x 4 x i32> undef, zeroinitializer
   ret <vscale x 4 x i1> %r

diff  --git a/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll b/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll
index 7986dbfa1e392..a07e6fae5d4f4 100644
--- a/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll
+++ b/llvm/test/Transforms/InstSimplify/cmp-vec-fast-path.ll
@@ -13,7 +13,7 @@ define <2 x i1> @i32cmp_eq_fixed_zero() {
 
 define <vscale x 2 x i1> @i32cmp_eq_scalable_zero() {
 ; CHECK-LABEL: @i32cmp_eq_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = icmp eq <vscale x 2 x i32> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -29,7 +29,7 @@ define <2 x i1> @i32cmp_eq_fixed_one() {
 
 define <vscale x 2 x i1> @i32cmp_eq_scalable_one() {
 ; CHECK-LABEL: @i32cmp_eq_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = icmp eq <vscale x 2 x i32> shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -109,7 +109,7 @@ define <2 x i1> @i32cmp_uge_fixed_zero() {
 
 define <vscale x 2 x i1> @i32cmp_uge_scalable_zero() {
 ; CHECK-LABEL: @i32cmp_uge_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = icmp uge <vscale x 2 x i32> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -125,7 +125,7 @@ define <2 x i1> @i32cmp_uge_fixed_one() {
 
 define <vscale x 2 x i1> @i32cmp_uge_scalable_one() {
 ; CHECK-LABEL: @i32cmp_uge_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = icmp uge <vscale x 2 x i32> shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -173,7 +173,7 @@ define <2 x i1> @i32cmp_ule_fixed_zero() {
 
 define <vscale x 2 x i1> @i32cmp_ule_scalable_zero() {
 ; CHECK-LABEL: @i32cmp_ule_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = icmp ule <vscale x 2 x i32> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -189,7 +189,7 @@ define <2 x i1> @i32cmp_ule_fixed_one() {
 
 define <vscale x 2 x i1> @i32cmp_ule_scalable_one() {
 ; CHECK-LABEL: @i32cmp_ule_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = icmp ule <vscale x 2 x i32> shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -237,7 +237,7 @@ define <2 x i1> @i32cmp_sge_fixed_zero() {
 
 define <vscale x 2 x i1> @i32cmp_sge_scalable_zero() {
 ; CHECK-LABEL: @i32cmp_sge_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = icmp sge <vscale x 2 x i32> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -253,7 +253,7 @@ define <2 x i1> @i32cmp_sge_fixed_one() {
 
 define <vscale x 2 x i1> @i32cmp_sge_scalable_one() {
 ; CHECK-LABEL: @i32cmp_sge_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = icmp sge <vscale x 2 x i32> shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -301,7 +301,7 @@ define <2 x i1> @i32cmp_sle_fixed_zero() {
 
 define <vscale x 2 x i1> @i32cmp_sle_scalable_zero() {
 ; CHECK-LABEL: @i32cmp_sle_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = icmp sle <vscale x 2 x i32> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -317,7 +317,7 @@ define <2 x i1> @i32cmp_sle_fixed_one() {
 
 define <vscale x 2 x i1> @i32cmp_sle_scalable_one() {
 ; CHECK-LABEL: @i32cmp_sle_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = icmp sle <vscale x 2 x i32> shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> undef, i32 1, i32 0), <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -365,7 +365,7 @@ define <2 x i1> @floatcmp_oeq_fixed_zero() {
 
 define <vscale x 2 x i1> @floatcmp_oeq_scalable_zero() {
 ; CHECK-LABEL: @floatcmp_oeq_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp oeq <vscale x 2 x float> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -381,7 +381,7 @@ define <2 x i1> @floatcmp_oeq_fixed_one() {
 
 define <vscale x 2 x i1> @floatcmp_oeq_scalable_one() {
 ; CHECK-LABEL: @floatcmp_oeq_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp oeq <vscale x 2 x float> shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -429,7 +429,7 @@ define <2 x i1> @floatcmp_oge_fixed_zero() {
 
 define <vscale x 2 x i1> @floatcmp_oge_scalable_zero() {
 ; CHECK-LABEL: @floatcmp_oge_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp oge <vscale x 2 x float> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -445,7 +445,7 @@ define <2 x i1> @floatcmp_oge_fixed_one() {
 
 define <vscale x 2 x i1> @floatcmp_oge_scalable_one() {
 ; CHECK-LABEL: @floatcmp_oge_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp oge <vscale x 2 x float> shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -493,7 +493,7 @@ define <2 x i1> @floatcmp_ole_fixed_zero() {
 
 define <vscale x 2 x i1> @floatcmp_ole_scalable_zero() {
 ; CHECK-LABEL: @floatcmp_ole_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp ole <vscale x 2 x float> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -509,7 +509,7 @@ define <2 x i1> @floatcmp_ole_fixed_one() {
 
 define <vscale x 2 x i1> @floatcmp_ole_scalable_one() {
 ; CHECK-LABEL: @floatcmp_ole_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp ole <vscale x 2 x float> shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -557,7 +557,7 @@ define <2 x i1> @floatcmp_ord_fixed_zero() {
 
 define <vscale x 2 x i1> @floatcmp_ord_scalable_zero() {
 ; CHECK-LABEL: @floatcmp_ord_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp ord <vscale x 2 x float> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -573,7 +573,7 @@ define <2 x i1> @floatcmp_ord_fixed_one() {
 
 define <vscale x 2 x i1> @floatcmp_ord_scalable_one() {
 ; CHECK-LABEL: @floatcmp_ord_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp ord <vscale x 2 x float> shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -589,7 +589,7 @@ define <2 x i1> @floatcmp_ueq_fixed_zero() {
 
 define <vscale x 2 x i1> @floatcmp_ueq_scalable_zero() {
 ; CHECK-LABEL: @floatcmp_ueq_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp ueq <vscale x 2 x float> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -605,7 +605,7 @@ define <2 x i1> @floatcmp_ueq_fixed_one() {
 
 define <vscale x 2 x i1> @floatcmp_ueq_scalable_one() {
 ; CHECK-LABEL: @floatcmp_ueq_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp ueq <vscale x 2 x float> shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -653,7 +653,7 @@ define <2 x i1> @floatcmp_uge_fixed_zero() {
 
 define <vscale x 2 x i1> @floatcmp_uge_scalable_zero() {
 ; CHECK-LABEL: @floatcmp_uge_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp uge <vscale x 2 x float> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -669,7 +669,7 @@ define <2 x i1> @floatcmp_uge_fixed_one() {
 
 define <vscale x 2 x i1> @floatcmp_uge_scalable_one() {
 ; CHECK-LABEL: @floatcmp_uge_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp uge <vscale x 2 x float> shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -717,7 +717,7 @@ define <2 x i1> @floatcmp_ule_fixed_zero() {
 
 define <vscale x 2 x i1> @floatcmp_ule_scalable_zero() {
 ; CHECK-LABEL: @floatcmp_ule_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp ule <vscale x 2 x float> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -733,7 +733,7 @@ define <2 x i1> @floatcmp_ule_fixed_one() {
 
 define <vscale x 2 x i1> @floatcmp_ule_scalable_one() {
 ; CHECK-LABEL: @floatcmp_ule_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp ule <vscale x 2 x float> shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res
@@ -813,7 +813,7 @@ define <2 x i1> @floatcmp_true_fixed_zero() {
 
 define <vscale x 2 x i1> @floatcmp_true_scalable_zero() {
 ; CHECK-LABEL: @floatcmp_true_scalable_zero(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp true <vscale x 2 x float> zeroinitializer, zeroinitializer
   ret <vscale x 2 x i1> %res
@@ -829,7 +829,7 @@ define <2 x i1> @floatcmp_true_fixed_one() {
 
 define <vscale x 2 x i1> @floatcmp_true_scalable_one() {
 ; CHECK-LABEL: @floatcmp_true_scalable_one(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %res = fcmp true <vscale x 2 x float> shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer), shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 1.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %res

diff  --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll
index 6f7db7b856345..529fc5ed960a0 100644
--- a/llvm/test/Transforms/InstSimplify/gep.ll
+++ b/llvm/test/Transforms/InstSimplify/gep.ll
@@ -249,7 +249,7 @@ define <4 x ptr> @vector_idx_mix_scalar_vector() {
 
 define <vscale x 4 x ptr> @scalable_idx_scalar() {
 ; CHECK-LABEL: @scalable_idx_scalar(
-; CHECK-NEXT:    ret <vscale x 4 x ptr> getelementptr (i32, <vscale x 4 x ptr> zeroinitializer, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 4 x ptr> getelementptr (i32, <vscale x 4 x ptr> zeroinitializer, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer))
 ;
   %gep = getelementptr i32, <vscale x 4 x ptr> zeroinitializer, i64 1
   ret <vscale x 4 x ptr> %gep

diff  --git a/llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll
index 00efca9f2de65..7d4a7c2e6c3a7 100644
--- a/llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll
+++ b/llvm/test/Transforms/InstSimplify/vscale-inseltpoison.ll
@@ -132,7 +132,7 @@ define i32 @insert_extract_element_same_vec_idx_4() {
 
 define <vscale x 2 x i1> @cmp_le_smax_always_true(<vscale x 2 x i64> %x) {
 ; CHECK-LABEL: @cmp_le_smax_always_true(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %cmp = icmp sle <vscale x 2 x i64> %x, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 9223372036854775807, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %cmp

diff  --git a/llvm/test/Transforms/InstSimplify/vscale.ll b/llvm/test/Transforms/InstSimplify/vscale.ll
index 95848a775adcf..47d281ea2d46d 100644
--- a/llvm/test/Transforms/InstSimplify/vscale.ll
+++ b/llvm/test/Transforms/InstSimplify/vscale.ll
@@ -144,7 +144,7 @@ entry:
 
 define <vscale x 2 x i1> @cmp_le_smax_always_true(<vscale x 2 x i64> %x) {
 ; CHECK-LABEL: @cmp_le_smax_always_true(
-; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ;
   %cmp = icmp sle <vscale x 2 x i64> %x, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 9223372036854775807, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
   ret <vscale x 2 x i1> %cmp

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index f35bbf9ea5e15..1215962db1ba4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -153,7 +153,7 @@ define void @test_widen_induction(ptr %A, i64 %N) {
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[N_MOD_VF3:%.*]] = urem i64 [[N]], 2
 ; CHECK-NEXT:    [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[BC_RESUME_VAL]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -211,7 +211,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
 ; CHECK-NEXT:    [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
@@ -247,7 +247,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
 ; CHECK-NEXT:    [[N_MOD_VF3:%.*]] = urem i64 [[TMP0]], 2
 ; CHECK-NEXT:    [[N_VEC4:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF3]]
 ; CHECK-NEXT:    [[IND_END5:%.*]] = add i64 [[START]], [[N_VEC4]]
-; CHECK-NEXT:    [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i64> poison, i64 [[BC_RESUME_VAL]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT11:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT10]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION12:%.*]] = add <2 x i64> [[DOTSPLAT11]], <i64 0, i64 1>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -336,7 +336,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[N_MOD_VF3:%.*]] = urem i64 [[N]], 2
 ; CHECK-NEXT:    [[IND_END:%.*]] = sub i64 [[N]], [[N_MOD_VF3]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[BC_RESUME_VAL]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -427,7 +427,7 @@ define void @test_widen_extended_induction(ptr %dst) {
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -512,7 +512,7 @@ define void @test_widen_truncated_induction(ptr %A) {
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i8
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[TMP7]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[TMP7]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
index 76c301001e1f1..6f3728d046d3e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -21,10 +21,10 @@
 ; FORCED-NEXT:    %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; FORCED-NEXT:    %0 = add i32 %index, 0
 ; FORCED-NEXT:    %1 = extractvalue { i64, i64 } %sv, 0
-; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x i64> poison, i64 %1, i32 0
+; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x i64> poison, i64 %1, i64 0
 ; FORCED-NEXT:    %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
 ; FORCED-NEXT:    %2 = extractvalue { i64, i64 } %sv, 1
-; FORCED-NEXT:    %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 %2, i32 0
+; FORCED-NEXT:    %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 %2, i64 0
 ; FORCED-NEXT:    %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
 ; FORCED-NEXT:    %3 = getelementptr i64, i64* %dst, i32 %0
 ; FORCED-NEXT:    %4 = add <2 x i64> %broadcast.splat, %broadcast.splat2
@@ -71,10 +71,10 @@ declare float @powf(float, float) readnone nounwind
 ; FORCED-NEXT:    %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; FORCED-NEXT:    %0 = add i32 %index, 0
 ; FORCED-NEXT:    %1 = extractvalue { float, float } %sv, 0
-; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x float> poison, float %1, i32 0
+; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x float> poison, float %1, i64 0
 ; FORCED-NEXT:    %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
 ; FORCED-NEXT:    %2 = extractvalue { float, float } %sv, 1
-; FORCED-NEXT:    %broadcast.splatinsert1 = insertelement <2 x float> poison, float %2, i32 0
+; FORCED-NEXT:    %broadcast.splatinsert1 = insertelement <2 x float> poison, float %2, i64 0
 ; FORCED-NEXT:    %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
 ; FORCED-NEXT:    %3 = getelementptr float, float* %dst, i32 %0
 ; FORCED-NEXT:    %4 = call <2 x float> @llvm.pow.v2f32(<2 x float> %broadcast.splat, <2 x float> %broadcast.splat2)

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
index 38cab8a025779..2be525a2abc0d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
@@ -70,7 +70,7 @@ define void @PR34711(ptr %a, ptr %b, ptr %c, i64 %n) #0 {
 ; CHECK-VF4UF1-LABEL: @PR34711
 ; CHECK-VF4UF1: vector.body
 ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ]
-; CHECK-VF4UF1: %[[MGATHER]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> {{.*}}, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
+; CHECK-VF4UF1: %[[MGATHER]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> {{.*}}, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
 ; CHECK-VF4UF1-NEXT: %[[SPLICE:.*]] = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> %[[VEC_RECUR]], <vscale x 4 x i16> %[[MGATHER]], i32 -1)
 ; CHECK-VF4UF1-NEXT: %[[SXT1:.*]] = sext <vscale x 4 x i16> %[[SPLICE]] to <vscale x 4 x i32>
 ; CHECK-VF4UF1-NEXT: %[[SXT2:.*]] = sext <vscale x 4 x i16> %[[MGATHER]] to <vscale x 4 x i32>

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
index 74923d85849fc..98d55ae15c077 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
@@ -57,14 +57,14 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
 ; SVE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SVE:       vector.body:
 ; SVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SVE-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x double> [ insertelement (<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i32 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), double 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; SVE-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x double> [ insertelement (<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), double 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; SVE-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
 ; SVE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP4]]
 ; SVE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; SVE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP6]], align 4
 ; SVE-NEXT:    [[TMP7:%.*]] = sext <vscale x 2 x i32> [[WIDE_LOAD]] to <vscale x 2 x i64>
 ; SVE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], <vscale x 2 x i64> [[TMP7]]
-; SVE-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP8]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> poison)
+; SVE-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP8]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> poison)
 ; SVE-NEXT:    [[TMP9]] = fadd <vscale x 2 x double> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
 ; SVE-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; SVE-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 2

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
index 78781215bdd96..c77d0bd4e8cf1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
@@ -9,7 +9,7 @@ define void @interleaved_store_first_order_recurrence(ptr noalias %src, ptr %dst
 ; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 99>, %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SRC:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw nsw i64 [[TMP0]], 3

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
index 80410a44e9e67..2b6933654ac1a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
@@ -18,7 +18,7 @@
 
 ; CHECK-LABEL: @foo_i32(
 ; CHECK-LABEL: vector.ph:
-; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0
 ; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer
 
 ; CHECK-LABEL: vector.body:
@@ -85,7 +85,7 @@ for.end10:                                        ; preds = %for.inc8
 
 ; CHECK-LABEL: @foo_i64(
 ; CHECK-LABEL: vector.ph:
-; CHECK: %[[SplatVal:.*]] = insertelement <2 x i64> poison, i64 %n, i32 0
+; CHECK: %[[SplatVal:.*]] = insertelement <2 x i64> poison, i64 %n, i64 0
 ; CHECK: %[[Splat:.*]] = shufflevector <2 x i64> %[[SplatVal]], <2 x i64> poison, <2 x i32> zeroinitializer
 
 ; CHECK-LABEL: vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
index 6693b9b10a654..e718bda0ad3b1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll
@@ -24,16 +24,16 @@ define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 {
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], [[TMP5]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 2 x i32> [[TMP6]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], 2
 ; CHECK-NEXT:    [[TMP11:%.*]] = mul i32 1, [[TMP10]]
-; CHECK-NEXT:    [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP11]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP11]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
index 37d13374d1198..69c6f84aac53d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -33,7 +33,7 @@ for.end:                                 ; preds = %for.body, %entry
 define void @vec_scalar(i64 %N, ptr nocapture %a) {
 ; CHECK-LABEL: @vec_scalar
 ; CHECK: vector.body:
-; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+01, i32 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+01, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer))
 entry:
   %cmp7 = icmp sgt i64 %N, 0
   br i1 %cmp7, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
index 52af3bfdd6034..52feff81914a3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll
@@ -22,7 +22,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP7:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP9]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> poison)
@@ -109,11 +109,11 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP7:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP9]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[TMP10:%.*]] = select fast <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0x7FF0000000000000, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP10:%.*]] = select fast <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0x7FF0000000000000, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP10]])
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP11]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP11]], float [[VEC_PHI]]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
index 56bc85121cfa6..167e1dbc2f538 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -26,7 +26,7 @@ define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-ORDERED-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 8 x i1>
 ; CHECK-ORDERED-TF: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
 ; CHECK-ORDERED-TF: %[[LOAD:.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0nxv8f32(<vscale x 8 x float>*
-; CHECK-ORDERED-TF: %[[SEL:.*]] = select <vscale x 8 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 8 x float> %[[LOAD]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[SEL:.*]] = select <vscale x 8 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 8 x float> %[[LOAD]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[RDX]] = call float @llvm.vector.reduce.fadd.nxv8f32(float %[[VEC_PHI]], <vscale x 8 x float> %[[SEL]])
 ; CHECK-ORDERED-TF: for.end
 ; CHECK-ORDERED-TF: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX]], %middle.block ]
@@ -34,7 +34,7 @@ define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
 
 ; CHECK-UNORDERED-LABEL: @fadd_strict
 ; CHECK-UNORDERED: vector.body
-; CHECK-UNORDERED: %[[VEC_PHI:.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), %vector.ph ], [ %[[FADD_VEC:.*]], %vector.body ]
+; CHECK-UNORDERED: %[[VEC_PHI:.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), %vector.ph ], [ %[[FADD_VEC:.*]], %vector.body ]
 ; CHECK-UNORDERED: %[[LOAD_VEC:.*]] = load <vscale x 8 x float>, <vscale x 8 x float>*
 ; CHECK-UNORDERED: %[[FADD_VEC]] = fadd <vscale x 8 x float> %[[LOAD_VEC]], %[[VEC_PHI]]
 ; CHECK-UNORDERED-NOT: call float @llvm.vector.reduce.fadd
@@ -96,13 +96,13 @@ define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) #
 ; CHECK-ORDERED-TF: %[[LOAD2:.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0nxv8f32(<vscale x 8 x float>*
 ; CHECK-ORDERED-TF: %[[LOAD3:.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0nxv8f32(<vscale x 8 x float>*
 ; CHECK-ORDERED-TF: %[[LOAD4:.*]] = call <vscale x 8 x float> @llvm.masked.load.nxv8f32.p0nxv8f32(<vscale x 8 x float>*
-; CHECK-ORDERED-TF: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ACTIVE_LANE_MASK1]], <vscale x 8 x float> %[[LOAD1]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ACTIVE_LANE_MASK1]], <vscale x 8 x float> %[[LOAD1]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[RDX1:.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float %[[VEC_PHI1]], <vscale x 8 x float> %[[SEL1]])
-; CHECK-ORDERED-TF: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ACTIVE_LANE_MASK2]], <vscale x 8 x float> %[[LOAD2]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ACTIVE_LANE_MASK2]], <vscale x 8 x float> %[[LOAD2]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[RDX2:.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float %[[RDX1]], <vscale x 8 x float> %[[SEL2]])
-; CHECK-ORDERED-TF: %[[SEL3:.*]] = select <vscale x 8 x i1> %[[ACTIVE_LANE_MASK3]], <vscale x 8 x float> %[[LOAD3]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[SEL3:.*]] = select <vscale x 8 x i1> %[[ACTIVE_LANE_MASK3]], <vscale x 8 x float> %[[LOAD3]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[RDX3:.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float %[[RDX2]], <vscale x 8 x float> %[[SEL3]])
-; CHECK-ORDERED-TF: %[[SEL4:.*]] = select <vscale x 8 x i1> %[[ACTIVE_LANE_MASK4]], <vscale x 8 x float> %[[LOAD4]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[SEL4:.*]] = select <vscale x 8 x i1> %[[ACTIVE_LANE_MASK4]], <vscale x 8 x float> %[[LOAD4]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[RDX4]] = call float @llvm.vector.reduce.fadd.nxv8f32(float %[[RDX3]], <vscale x 8 x float> %[[SEL4]])
 ; CHECK-ORDERED-TF: for.end
 ; CHECK-ORDERED-TF: %[[PHI:.*]] = phi float [ %[[SCALAR:.*]], %for.body ], [ %[[RDX4]], %middle.block ]
@@ -110,10 +110,10 @@ define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) #
 
 ; CHECK-UNORDERED-LABEL: @fadd_strict_unroll
 ; CHECK-UNORDERED: vector.body
-; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ]
-; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
-; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ %[[VEC_FADD3:.*]], %vector.body ]
-; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ %[[VEC_FADD4:.*]], %vector.body ]
+; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ]
+; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
+; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ %[[VEC_FADD3:.*]], %vector.body ]
+; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ %[[VEC_FADD4:.*]], %vector.body ]
 ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <vscale x 8 x float>, <vscale x 8 x float>*
 ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <vscale x 8 x float>, <vscale x 8 x float>*
 ; CHECK-UNORDERED: %[[VEC_LOAD3:.*]] = load <vscale x 8 x float>, <vscale x 8 x float>*
@@ -164,17 +164,17 @@ define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float*
 ; CHECK-ORDERED: vector.ph
 ; CHECK-ORDERED: %[[STEPVEC1:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
 ; CHECK-ORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], zeroinitializer
-; CHECK-ORDERED: %[[STEPVEC_MUL:.*]] = mul <vscale x 4 x i64> %[[STEPVEC_ADD1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED: %[[STEPVEC_MUL:.*]] = mul <vscale x 4 x i64> %[[STEPVEC_ADD1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED: %[[INDUCTION:.*]] = add <vscale x 4 x i64> zeroinitializer, %[[STEPVEC_MUL]]
 ; CHECK-ORDERED: vector.body
 ; CHECK-ORDERED: %[[VEC_PHI2:.*]] = phi float [ %[[LOAD2]], %vector.ph ], [ %[[RDX2:.*]], %vector.body ]
 ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ %[[LOAD1]], %vector.ph ], [ %[[RDX1:.*]], %vector.body ]
 ; CHECK-ORDERED: %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %[[INDUCTION]], %vector.ph ], [ {{.*}}, %vector.body ]
 ; CHECK-ORDERED: %[[GEP1:.*]] = getelementptr inbounds float, float* %b, <vscale x 4 x i64> %[[VEC_IND]]
-; CHECK-ORDERED: %[[MGATHER1:.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[GEP1]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
-; CHECK-ORDERED: %[[OR:.*]] = or <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED: %[[MGATHER1:.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[GEP1]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-ORDERED: %[[OR:.*]] = or <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED: %[[GEP2:.*]] = getelementptr inbounds float, float* %b, <vscale x 4 x i64> %[[OR]]
-; CHECK-ORDERED: %[[MGATHER2:.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[GEP2]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-ORDERED: %[[MGATHER2:.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[GEP2]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
 ; CHECK-ORDERED: %[[RDX2]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI2]], <vscale x 4 x float> %[[MGATHER2]])
 ; CHECK-ORDERED: %[[RDX1]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI1]], <vscale x 4 x float> %[[MGATHER1]])
 
@@ -186,7 +186,7 @@ define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float*
 ; CHECK-ORDERED-TF: vector.ph
 ; CHECK-ORDERED-TF: %[[STEPVEC1:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
 ; CHECK-ORDERED-TF: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], zeroinitializer
-; CHECK-ORDERED-TF: %[[STEPVEC_MUL:.*]] = mul <vscale x 4 x i64> %[[STEPVEC_ADD1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[STEPVEC_MUL:.*]] = mul <vscale x 4 x i64> %[[STEPVEC_ADD1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[INDUCTION:.*]] = add <vscale x 4 x i64> zeroinitializer, %[[STEPVEC_MUL]]
 ; CHECK-ORDERED-TF: vector.body
 ; CHECK-ORDERED-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
@@ -195,12 +195,12 @@ define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float*
 ; CHECK-ORDERED-TF: %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %[[INDUCTION]], %vector.ph ], [ {{.*}}, %vector.body ]
 ; CHECK-ORDERED-TF: %[[GEP1:.*]] = getelementptr inbounds float, float* %b, <vscale x 4 x i64> %[[VEC_IND]]
 ; CHECK-ORDERED-TF: %[[MGATHER1:.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[GEP1]], i32 4, <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison)
-; CHECK-ORDERED-TF: %[[OR:.*]] = or <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[OR:.*]] = or <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[GEP2:.*]] = getelementptr inbounds float, float* %b, <vscale x 4 x i64> %[[OR]]
 ; CHECK-ORDERED-TF: %[[MGATHER2:.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[GEP2]], i32 4, <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison)
-; CHECK-ORDERED-TF: %[[SEL2:.*]] = select <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[MGATHER2]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[SEL2:.*]] = select <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[MGATHER2]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[RDX2]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI2]], <vscale x 4 x float> %[[SEL2]])
-; CHECK-ORDERED-TF: %[[SEL1:.*]] = select <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[MGATHER1]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[SEL1:.*]] = select <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[MGATHER1]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[RDX1]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI1]], <vscale x 4 x float> %[[SEL1]])
 
 ; CHECK-UNORDERED-LABEL: @fadd_strict_interleave
@@ -209,21 +209,21 @@ define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float*
 ; CHECK-UNORDERED: %[[LOAD1:.*]] = load float, float* %a
 ; CHECK-UNORDERED: %[[LOAD2:.*]] = load float, float* %[[ARRAYIDX]]
 ; CHECK-UNORDERED: vector.ph
-; CHECK-UNORDERED: %[[INS_ELT2:.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float %[[LOAD2]], i32 0
-; CHECK-UNORDERED: %[[INS_ELT1:.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float %[[LOAD1]], i32 0
+; CHECK-UNORDERED: %[[INS_ELT2:.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float %[[LOAD2]], i32 0
+; CHECK-UNORDERED: %[[INS_ELT1:.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float %[[LOAD1]], i32 0
 ; CHECK-UNORDERED: %[[STEPVEC1:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
 ; CHECK-UNORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], zeroinitializer
-; CHECK-UNORDERED: %[[STEPVEC_MUL:.*]] = mul <vscale x 4 x i64> %[[STEPVEC_ADD1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-UNORDERED: %[[STEPVEC_MUL:.*]] = mul <vscale x 4 x i64> %[[STEPVEC_ADD1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-UNORDERED: %[[INDUCTION:.*]] = add <vscale x 4 x i64> zeroinitializer, %[[STEPVEC_MUL]]
 ; CHECK-UNORDERED: vector.body
 ; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <vscale x 4 x float> [ %[[INS_ELT2]], %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
 ; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <vscale x 4 x float> [ %[[INS_ELT1]], %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ]
 ; CHECK-UNORDERED: %[[GEP1:.*]] = getelementptr inbounds float, float* %b, <vscale x 4 x i64>
-; CHECK-UNORDERED: %[[MGATHER1:.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[GEP1]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-UNORDERED: %[[MGATHER1:.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[GEP1]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
 ; CHECK-UNORDERED:  %[[VEC_FADD1]] = fadd <vscale x 4 x float> %[[MGATHER1]], %[[VEC_PHI1]]
-; CHECK-UNORDERED: %[[OR:.*]] = or <vscale x 4 x i64> {{.*}}, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-UNORDERED: %[[OR:.*]] = or <vscale x 4 x i64> {{.*}}, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-UNORDERED: %[[GEP2:.*]] = getelementptr inbounds float, float* %b, <vscale x 4 x i64> %[[OR]]
-; CHECK-UNORDERED: %[[MGATHER2:.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[GEP2]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-UNORDERED: %[[MGATHER2:.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %[[GEP2]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
 ; CHECK-UNORDERED: %[[VEC_FADD2]] = fadd <vscale x 4 x float> %[[MGATHER2]], %[[VEC_PHI2]]
 ; CHECK-UNORDERED-NOT: call float @llvm.vector.reduce.fadd
 ; CHECK-UNORDERED: middle.block
@@ -292,7 +292,7 @@ define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias n
 ; CHECK-ORDERED-TF: %[[LOAD1:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>*
 ; CHECK-ORDERED-TF: %[[LOAD2:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>*
 ; CHECK-ORDERED-TF: %[[ADD:.*]] = fadd <vscale x 4 x float> %[[LOAD1]], %[[LOAD2]]
-; CHECK-ORDERED-TF: %[[SEL:.*]] = select <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[SEL:.*]] = select <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[RDX]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI1]], <vscale x 4 x float> %[[SEL]])
 ; CHECK-ORDERED-TF: for.end.loopexit
 ; CHECK-ORDERED-TF: %[[EXIT_PHI:.*]] = phi float [ {{.*}}, %for.body ], [ %[[RDX]], %middle.block ]
@@ -302,7 +302,7 @@ define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias n
 
 ; CHECK-UNORDERED-LABEL: @fadd_of_sum
 ; CHECK-UNORDERED: vector.body
-; CHECK-UNORDERED: %[[VEC_PHI:.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float 0.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
+; CHECK-UNORDERED: %[[VEC_PHI:.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float 0.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
 ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <vscale x 4 x float>, <vscale x 4 x float>*
 ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <vscale x 4 x float>, <vscale x 4 x float>*
 ; CHECK-UNORDERED: %[[VEC_FADD1:.*]] = fadd <vscale x 4 x float> %[[VEC_LOAD1]], %[[VEC_LOAD2]]
@@ -355,8 +355,8 @@ define float @fadd_conditional(float* noalias nocapture readonly %a, float* noal
 ; CHECK-ORDERED: %[[LOAD:.*]] = load <vscale x 4 x float>, <vscale x 4 x float>*
 ; CHECK-ORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD]], zeroinitializer
 ; CHECK-ORDERED: %[[MASKED_LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* {{.*}}, i32 4, <vscale x 4 x i1> %[[FCMP]], <vscale x 4 x float> poison)
-; CHECK-ORDERED: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-ORDERED: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[XOR]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]]
+; CHECK-ORDERED: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[XOR]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]]
 ; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI]], <vscale x 4 x float> %[[SELECT]])
 ; CHECK-ORDERED: scalar.ph
 ; CHECK-ORDERED: %[[MERGE_RDX:.*]] = phi float [ 1.000000e+00, %entry ], [ %[[RDX]], %middle.block ]
@@ -379,11 +379,11 @@ define float @fadd_conditional(float* noalias nocapture readonly %a, float* noal
 ; CHECK-ORDERED-TF: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD]], zeroinitializer
 ; CHECK-ORDERED-TF: %[[SELECT0:.*]] = select <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x i1> %[[FCMP]], <vscale x 4 x i1> zeroinitializer
 ; CHECK-ORDERED-TF: %[[MASKED_LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* {{.*}}, i32 4, <vscale x 4 x i1> %[[SELECT0]], <vscale x 4 x float> poison)
-; CHECK-ORDERED-TF: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[SELECT1:.*]] = select <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x i1> %[[XOR]], <vscale x 4 x i1> zeroinitializer
-; CHECK-ORDERED-TF: %[[SELECT2:.*]] = select <vscale x 4 x i1> %[[SELECT1]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]]
+; CHECK-ORDERED-TF: %[[SELECT2:.*]] = select <vscale x 4 x i1> %[[SELECT1]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]]
 ; CHECK-ORDERED-TF: %[[OR:.*]] = or <vscale x 4 x i1> %[[SELECT0]], %[[SELECT1]]
-; CHECK-ORDERED-TF: %[[SELECT3:.*]] = select <vscale x 4 x i1> %[[OR]], <vscale x 4 x float> %[[SELECT2]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: %[[SELECT3:.*]] = select <vscale x 4 x i1> %[[OR]], <vscale x 4 x float> %[[SELECT2]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: %[[RDX]] = call float @llvm.vector.reduce.fadd.nxv4f32(float %[[VEC_PHI]], <vscale x 4 x float> %[[SELECT3]])
 ; CHECK-ORDERED-TF: scalar.ph
 ; CHECK-ORDERED-TF: %[[MERGE_RDX:.*]] = phi float [ 1.000000e+00, %entry ], [ %[[RDX]], %middle.block ]
@@ -400,12 +400,12 @@ define float @fadd_conditional(float* noalias nocapture readonly %a, float* noal
 
 ; CHECK-UNORDERED-LABEL: @fadd_conditional
 ; CHECK-UNORDERED: vector.body
-; CHECK-UNORDERED: %[[VEC_PHI:.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float 1.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD:.*]], %vector.body ]
+; CHECK-UNORDERED: %[[VEC_PHI:.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float 1.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD:.*]], %vector.body ]
 ; CHECK-UNORDERED: %[[LOAD1:.*]] = load <vscale x 4 x float>, <vscale x 4 x float>*
 ; CHECK-UNORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD1]], zeroinitializer
 ; CHECK-UNORDERED: %[[MASKED_LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* {{.*}}, i32 4, <vscale x 4 x i1> %[[FCMP]], <vscale x 4 x float> poison)
-; CHECK-UNORDERED: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-UNORDERED: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[XOR]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]]
+; CHECK-UNORDERED: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-UNORDERED: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[XOR]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]]
 ; CHECK-UNORDERED: %[[VEC_FADD]] = fadd <vscale x 4 x float> %[[VEC_PHI]], %[[SELECT]]
 ; CHECK-UNORDERED-NOT: call float @llvm.vector.reduce.fadd
 ; CHECK-UNORDERED: middle.block
@@ -459,7 +459,7 @@ define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocaptur
 
 ; CHECK-UNORDERED-LABEL: @fadd_multiple
 ; CHECK-UNORDERED: vector.body
-; CHECK-UNORDERED: %[[PHI:.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float -0.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
+; CHECK-UNORDERED: %[[PHI:.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float -0.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
 ; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <vscale x 8 x float>, <vscale x 8 x float>
 ; CHECK-UNORDERED: %[[VEC_FADD1:.*]] = fadd <vscale x 8 x float> %[[PHI]], %[[VEC_LOAD1]]
 ; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <vscale x 8 x float>, <vscale x 8 x float>
@@ -544,13 +544,13 @@ define float @fmuladd_strict(float* %a, float* %b, i64 %n) #0 {
 ; CHECK-ORDERED-TF: [[FMUL1:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]]
 ; CHECK-ORDERED-TF: [[FMUL2:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]]
 ; CHECK-ORDERED-TF: [[FMUL3:%.*]] = fmul <vscale x 8 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]]
-; CHECK-ORDERED-TF: [[SEL:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[FMUL]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: [[SEL:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[FMUL]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: [[RDX:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[SEL]])
-; CHECK-ORDERED-TF: [[SEL1:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK1]], <vscale x 8 x float> [[FMUL1]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: [[SEL1:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK1]], <vscale x 8 x float> [[FMUL1]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: [[RDX1:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[RDX]], <vscale x 8 x float> [[SEL1]])
-; CHECK-ORDERED-TF: [[SEL2:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 8 x float> [[FMUL2]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: [[SEL2:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 8 x float> [[FMUL2]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: [[RDX2:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[RDX1]], <vscale x 8 x float> [[SEL2]])
-; CHECK-ORDERED-TF: [[SEL3:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK3]], <vscale x 8 x float> [[FMUL3]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: [[SEL3:%.*]] = select <vscale x 8 x i1> [[ACTIVE_LANE_MASK3]], <vscale x 8 x float> [[FMUL3]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: [[RDX3]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[RDX2]], <vscale x 8 x float> [[SEL3]])
 ; CHECK-ORDERED-TF: for.end
 ; CHECK-ORDERED-TF: [[RES:%.*]] = phi float [ [[SCALAR:%.*]], %for.body ], [ [[RDX3]], %middle.block ]
@@ -558,10 +558,10 @@ define float @fmuladd_strict(float* %a, float* %b, i64 %n) #0 {
 
 ; CHECK-UNORDERED-LABEL: @fmuladd_strict
 ; CHECK-UNORDERED: vector.body
-; CHECK-UNORDERED:    [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), %vector.ph ], [ [[FMULADD:%.*]], %vector.body ]
-; CHECK-UNORDERED:    [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD1:%.*]], %vector.body ]
-; CHECK-UNORDERED:    [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD2:%.*]], %vector.body ]
-; CHECK-UNORDERED:    [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD3:%.*]], %vector.body ]
+; CHECK-UNORDERED:    [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), %vector.ph ], [ [[FMULADD:%.*]], %vector.body ]
+; CHECK-UNORDERED:    [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD1:%.*]], %vector.body ]
+; CHECK-UNORDERED:    [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD2:%.*]], %vector.body ]
+; CHECK-UNORDERED:    [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD3:%.*]], %vector.body ]
 ; CHECK-UNORDERED: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, <vscale x 8 x float>*
 ; CHECK-UNORDERED: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, <vscale x 8 x float>*
 ; CHECK-UNORDERED: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, <vscale x 8 x float>*
@@ -655,13 +655,13 @@ define float @fmuladd_strict_fmf(float* %a, float* %b, i64 %n) #0 {
 ; CHECK-ORDERED-TF: [[FMUL1:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]]
 ; CHECK-ORDERED-TF: [[FMUL2:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]]
 ; CHECK-ORDERED-TF: [[FMUL3:%.*]] = fmul nnan <vscale x 8 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]]
-; CHECK-ORDERED-TF: [[SEL:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[FMUL]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: [[SEL:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x float> [[FMUL]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: [[RDX:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], <vscale x 8 x float> [[SEL]])
-; CHECK-ORDERED-TF: [[SEL1:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK1]], <vscale x 8 x float> [[FMUL1]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: [[SEL1:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK1]], <vscale x 8 x float> [[FMUL1]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: [[RDX1:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[RDX]], <vscale x 8 x float> [[SEL1]])
-; CHECK-ORDERED-TF: [[SEL2:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 8 x float> [[FMUL2]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: [[SEL2:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 8 x float> [[FMUL2]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: [[RDX2:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[RDX1]], <vscale x 8 x float> [[SEL2]])
-; CHECK-ORDERED-TF: [[SEL3:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK3]], <vscale x 8 x float> [[FMUL3]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-ORDERED-TF: [[SEL3:%.*]] = select nnan <vscale x 8 x i1> [[ACTIVE_LANE_MASK3]], <vscale x 8 x float> [[FMUL3]], <vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF: [[RDX3]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[RDX2]], <vscale x 8 x float> [[SEL3]])
 ; CHECK-ORDERED-TF: for.end
 ; CHECK-ORDERED-TF: [[RES:%.*]] = phi float [ [[SCALAR:%.*]], %for.body ], [ [[RDX3]], %middle.block ]
@@ -669,10 +669,10 @@ define float @fmuladd_strict_fmf(float* %a, float* %b, i64 %n) #0 {
 
 ; CHECK-UNORDERED-LABEL: @fmuladd_strict_fmf
 ; CHECK-UNORDERED: vector.body
-; CHECK-UNORDERED:    [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), %vector.ph ], [ [[FMULADD:%.*]], %vector.body ]
-; CHECK-UNORDERED:    [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD1:%.*]], %vector.body ]
-; CHECK-UNORDERED:    [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD2:%.*]], %vector.body ]
-; CHECK-UNORDERED:    [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i32 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD3:%.*]], %vector.body ]
+; CHECK-UNORDERED:    [[VEC_PHI:%.*]] = phi <vscale x 8 x float> [ insertelement (<vscale x 8 x float> shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), float 0.000000e+00, i32 0), %vector.ph ], [ [[FMULADD:%.*]], %vector.body ]
+; CHECK-UNORDERED:    [[VEC_PHI1:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD1:%.*]], %vector.body ]
+; CHECK-UNORDERED:    [[VEC_PHI2:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD2:%.*]], %vector.body ]
+; CHECK-UNORDERED:    [[VEC_PHI3:%.*]] = phi <vscale x 8 x float> [ shufflevector (<vscale x 8 x float> insertelement (<vscale x 8 x float> poison, float -0.000000e+00, i64 0), <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[FMULADD3:%.*]], %vector.body ]
 ; CHECK-UNORDERED: [[WIDE_LOAD:%.*]] = load <vscale x 8 x float>, <vscale x 8 x float>*
 ; CHECK-UNORDERED: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x float>, <vscale x 8 x float>*
 ; CHECK-UNORDERED: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x float>, <vscale x 8 x float>*

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
index 88a814948f61f..3ce24073d521c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -483,7 +483,7 @@ define float @fadd_predicated(float* noalias nocapture %a, i64 %n) {
 ; CHECK-ORDERED-LABEL: @fadd_predicated
 ; CHECK-ORDERED: vector.ph
 ; CHECK-ORDERED: %[[TRIP_MINUS_ONE:.*]] = sub i64 %n, 1
-; CHECK-ORDERED: %[[BROADCAST_INS:.*]] = insertelement <2 x i64> poison, i64 %[[TRIP_MINUS_ONE]], i32 0
+; CHECK-ORDERED: %[[BROADCAST_INS:.*]] = insertelement <2 x i64> poison, i64 %[[TRIP_MINUS_ONE]], i64 0
 ; CHECK-ORDERED: %[[SPLAT:.*]] = shufflevector <2 x i64> %[[BROADCAST_INS]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-ORDERED: vector.body
 ; CHECK-ORDERED: %[[RDX_PHI:.*]] =  phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %pred.load.continue2 ]
@@ -498,7 +498,7 @@ define float @fadd_predicated(float* noalias nocapture %a, i64 %n) {
 ; CHECK-UNORDERED-LABEL: @fadd_predicated
 ; CHECK-UNORDERED: vector.ph
 ; CHECK-UNORDERED: %[[TRIP_MINUS_ONE:.*]] = sub i64 %n, 1
-; CHECK-UNORDERED: %[[BROADCAST_INS:.*]] = insertelement <2 x i64> poison, i64 %[[TRIP_MINUS_ONE]], i32 0
+; CHECK-UNORDERED: %[[BROADCAST_INS:.*]] = insertelement <2 x i64> poison, i64 %[[TRIP_MINUS_ONE]], i64 0
 ; CHECK-UNORDERED: %[[SPLAT:.*]] = shufflevector <2 x i64> %[[BROADCAST_INS]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-UNORDERED: vector.body
 ; CHECK-UNORDERED: %[[RDX_PHI:.*]] =  phi <2 x float> [ <float 0.000000e+00, float -0.000000e+00>, %vector.ph ], [ %[[FADD:.*]], %pred.load.continue2 ]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
index 8ee0338c1468a..b3cec2afb06b6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
@@ -11,7 +11,7 @@ define void @cmpsel_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly
 ; CHECK:       vector.body:
 ; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 10, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 10, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK:         store <vscale x 4 x i32> [[TMP2]], ptr {{.*}}, align 4
 ;
 entry:
@@ -41,8 +41,8 @@ define void @cmpsel_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly
 ; CHECK-NEXT:  entry:
 ; CHECK:       vector.body:
 ; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr {{.*}}, align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+01, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+01, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK:         store <vscale x 4 x float> [[TMP2]], ptr {{.*}}, align 4
 
 entry:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
index a5ebd005df7c5..7266be4611158 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -101,7 +101,7 @@ define void @cond_inv_load_f64f64f64(ptr noalias nocapture %a, ptr noalias nocap
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x double>, ptr [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <vscale x 4 x double> [[WIDE_LOAD]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 4.000000e-01, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <vscale x 4 x double> [[WIDE_LOAD]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 4.000000e-01, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x double> poison)
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], i32 8, <vscale x 4 x i1> [[TMP6]])

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
index d9b837f9f232f..71d5c37d72583 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
@@ -47,11 +47,11 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP6]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0
-; CHECK-NEXT:    store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP14]], align 1
+; CHECK-NEXT:    store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP14]], align 1
 ; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP16:%.*]] = mul i32 [[TMP15]], 16
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 [[TMP16]]
-; CHECK-NEXT:    store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP17]], align 1
+; CHECK-NEXT:    store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP17]], align 1
 ; CHECK-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP19:%.*]] = mul i64 [[TMP18]], 32
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
@@ -78,7 +78,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
 ; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP25]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0
-; CHECK-NEXT:    store <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), ptr [[TMP27]], align 1
+; CHECK-NEXT:    store <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), ptr [[TMP27]], align 1
 ; CHECK-NEXT:    [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP29:%.*]] = mul i64 [[TMP28]], 8
 ; CHECK-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[OFFSET_IDX]], [[TMP29]]
@@ -125,11 +125,11 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
 ; CHECK-VF8-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP4]]
 ; CHECK-VF8-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
 ; CHECK-VF8-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
-; CHECK-VF8-NEXT:    store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP12]], align 1
+; CHECK-VF8-NEXT:    store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP12]], align 1
 ; CHECK-VF8-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-VF8-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP13]], 16
 ; CHECK-VF8-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 [[TMP14]]
-; CHECK-VF8-NEXT:    store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP15]], align 1
+; CHECK-VF8-NEXT:    store <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), ptr [[TMP15]], align 1
 ; CHECK-VF8-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-VF8-NEXT:    [[TMP17:%.*]] = mul i64 [[TMP16]], 32
 ; CHECK-VF8-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]]
@@ -226,11 +226,11 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
-; CHECK-NEXT:    store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP12]], align 1
+; CHECK-NEXT:    store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP12]], align 1
 ; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP13]], 2
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 [[TMP14]]
-; CHECK-NEXT:    store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP15]], align 1
+; CHECK-NEXT:    store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP15]], align 1
 ; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP17:%.*]] = mul i64 [[TMP16]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]]
@@ -296,11 +296,11 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) {
 ; CHECK-VF8-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]]
 ; CHECK-VF8-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
 ; CHECK-VF8-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
-; CHECK-VF8-NEXT:    store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP12]], align 1
+; CHECK-VF8-NEXT:    store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP12]], align 1
 ; CHECK-VF8-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-VF8-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP13]], 2
 ; CHECK-VF8-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 [[TMP14]]
-; CHECK-VF8-NEXT:    store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP15]], align 1
+; CHECK-VF8-NEXT:    store <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP15]], align 1
 ; CHECK-VF8-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-VF8-NEXT:    [[TMP17:%.*]] = mul i64 [[TMP16]], 4
 ; CHECK-VF8-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
index 7a630feada12a..d01e600fbb373 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
@@ -20,7 +20,7 @@ define void @gather_nxv4i32_ind64(ptr noalias nocapture readonly %a, ptr noalias
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], <vscale x 4 x i64> [[WIDE_LOAD]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
@@ -91,7 +91,7 @@ define void @scatter_nxv4i32_ind32(ptr noalias nocapture %a, ptr noalias nocaptu
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64>
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], <vscale x 4 x i64> [[TMP8]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
@@ -158,7 +158,7 @@ define void @scatter_inv_nxv4i32(ptr noalias nocapture %inv, ptr noalias nocaptu
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]])
+; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
@@ -228,7 +228,7 @@ define void @gather_inv_nxv4i32(ptr noalias nocapture %a, ptr noalias nocapture
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i32> poison)
 ; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP4]], i32 4, <vscale x 4 x i1> [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
@@ -307,12 +307,12 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP8:%.*]] = shl <vscale x 4 x i64> [[STEP_ADD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP8:%.*]] = shl <vscale x 4 x i64> [[STEP_ADD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], <vscale x 4 x i64> [[TMP7]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], <vscale x 4 x i64> [[TMP8]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vscale.i32()

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
index 7cc16c9ba4fda..b4143ba40454c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll
@@ -14,7 +14,7 @@ define void @induction_i7(i64* %dst) #0 {
 ; CHECK:         [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.experimental.stepvector.nxv2i8()
 ; CHECK:         [[TMP5:%.*]] = trunc <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i7>
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <vscale x 2 x i7> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = mul <vscale x 2 x i7> [[TMP6]], shufflevector (<vscale x 2 x i7> insertelement (<vscale x 2 x i7> poison, i7 1, i32 0), <vscale x 2 x i7> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP7:%.*]] = mul <vscale x 2 x i7> [[TMP6]], shufflevector (<vscale x 2 x i7> insertelement (<vscale x 2 x i7> poison, i7 1, i64 0), <vscale x 2 x i7> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 2 x i7> zeroinitializer, [[TMP7]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
@@ -61,7 +61,7 @@ define void @induction_i3_zext(i64* %dst) #0 {
 ; CHECK:         [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.experimental.stepvector.nxv2i8()
 ; CHECK:         [[TMP5:%.*]] = trunc <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i3>
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <vscale x 2 x i3> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = mul <vscale x 2 x i3> [[TMP6]], shufflevector (<vscale x 2 x i3> insertelement (<vscale x 2 x i3> poison, i3 1, i32 0), <vscale x 2 x i3> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP7:%.*]] = mul <vscale x 2 x i3> [[TMP6]], shufflevector (<vscale x 2 x i3> insertelement (<vscale x 2 x i3> poison, i3 1, i64 0), <vscale x 2 x i3> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 2 x i3> zeroinitializer, [[TMP7]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
index 05a6ebe2d2221..62eb25e8b4a0c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
@@ -6,7 +6,7 @@ define void @invariant_load(i64 %n, ptr noalias nocapture %a, ptr nocapture read
 ; CHECK: vector.body:
 ; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, ptr %b, i64 42
 ; CHECK-NEXT: %[[INVLOAD:.*]] = load i32, ptr %[[GEP]]
-; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[INVLOAD]], i32 0
+; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[INVLOAD]], i64 0
 ; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[SPLATINS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, ptr
 ; CHECK-NEXT: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[SPLAT]], %[[LOAD]]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
index a110cb0e48498..6a562e8b92bb7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
@@ -78,7 +78,7 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
index 31abba4113373..4a8e63766f19a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
@@ -5,7 +5,7 @@ define void @stride7_i32(ptr noalias nocapture %dst, i64 %n) #0 {
 ; CHECK-LABEL: @stride7_i32(
 ; CHECK:      vector.body
 ; CHECK:        %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
-; CHECK-NEXT:   %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:   %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds i32, ptr %dst, <vscale x 4 x i64> %[[PTR_INDICES]]
 ; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %[[PTRS]]
 ; CHECK-NEXT:   %[[VALS:.*]] = add nsw <vscale x 4 x i32> %[[GLOAD]],
@@ -32,7 +32,7 @@ define void @stride7_f64(ptr noalias nocapture %dst, i64 %n) #0 {
 ; CHECK-LABEL: @stride7_f64(
 ; CHECK:      vector.body
 ; CHECK:        %[[VEC_IND:.*]] = phi <vscale x 2 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
-; CHECK-NEXT:   %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 2 x i64> %[[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 7, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:   %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 2 x i64> %[[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 7, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds double, ptr %dst, <vscale x 2 x i64> %[[PTR_INDICES]]
 ; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %[[PTRS]],
 ; CHECK-NEXT:   %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
index 1721be34bb86e..67f2bcbcb3de5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
@@ -32,18 +32,18 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i64 [[TMP11]], 2
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul i64 8, [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP11]], 0
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP14]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP14]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
 ; CHECK-NEXT:    [[TMP16:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP15]]
-; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP16]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP16]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = mul i64 [[TMP11]], 1
-; CHECK-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP18]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP18]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
 ; CHECK-NEXT:    [[TMP20:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT6]], [[TMP19]]
-; CHECK-NEXT:    [[VECTOR_GEP7:%.*]] = mul <vscale x 2 x i64> [[TMP20]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[VECTOR_GEP7:%.*]] = mul <vscale x 2 x i64> [[TMP20]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 8, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP7]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP22]], 8

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
index 5a87b3b76b0d4..997aca89b825d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
@@ -14,7 +14,7 @@ define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture
 ; CHECK-NEXT:    [[VF:%.*]] = mul i64 [[VSCALE]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 7)
-; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NOT:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NOT:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[COND:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NOT]], i32 0
 ; CHECK-NEXT:    br i1 [[COND]], label %middle.block, label %vector.body
 ;

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
index cf5fdc4e30e83..1c26ee8479e57 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
@@ -8,37 +8,37 @@ target triple = "aarch64-linux-gnu"
 define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp
 ; CHECK-VF4IC1:      vector.body:
-; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
+; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
 ; CHECK-VF4IC1:        [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
-; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT:   [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT:   [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC1:      middle.block:
-; CHECK-VF4IC1-NEXT:   [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT:   [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC1-NEXT:   [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
 ; CHECK-VF4IC1-NEXT:   {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3
 
 ; CHECK-VF4IC4-LABEL: @select_const_i32_from_icmp
 ; CHECK-VF4IC4:      vector.body:
-; CHECK-VF4IC4:        [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ]
-; CHECK-VF4IC4:        [[VEC_PHI2:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ]
-; CHECK-VF4IC4:        [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ]
-; CHECK-VF4IC4:        [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ]
-; CHECK-VF4IC4:        [[VEC_ICMP1:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT:   [[VEC_ICMP2:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT:   [[VEC_ICMP3:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT:   [[VEC_ICMP4:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT:   [[VEC_SEL1]] = select <vscale x 4 x i1> [[VEC_ICMP1]], <vscale x 4 x i32> [[VEC_PHI1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT:   [[VEC_SEL2]] = select <vscale x 4 x i1> [[VEC_ICMP2]], <vscale x 4 x i32> [[VEC_PHI2]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT:   [[VEC_SEL3]] = select <vscale x 4 x i1> [[VEC_ICMP3]], <vscale x 4 x i32> [[VEC_PHI3]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT:   [[VEC_SEL4]] = select <vscale x 4 x i1> [[VEC_ICMP4]], <vscale x 4 x i32> [[VEC_PHI4]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4:        [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ]
+; CHECK-VF4IC4:        [[VEC_PHI2:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ]
+; CHECK-VF4IC4:        [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ]
+; CHECK-VF4IC4:        [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ]
+; CHECK-VF4IC4:        [[VEC_ICMP1:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[VEC_ICMP2:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[VEC_ICMP3:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[VEC_ICMP4:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[VEC_SEL1]] = select <vscale x 4 x i1> [[VEC_ICMP1]], <vscale x 4 x i32> [[VEC_PHI1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[VEC_SEL2]] = select <vscale x 4 x i1> [[VEC_ICMP2]], <vscale x 4 x i32> [[VEC_PHI2]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[VEC_SEL3]] = select <vscale x 4 x i1> [[VEC_ICMP3]], <vscale x 4 x i32> [[VEC_PHI3]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[VEC_SEL4]] = select <vscale x 4 x i1> [[VEC_ICMP4]], <vscale x 4 x i32> [[VEC_PHI4]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC4:      middle.block:
-; CHECK-VF4IC4-NEXT:   [[VEC_ICMP5:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL1]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[VEC_ICMP5:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL1]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC4-NEXT:   [[VEC_SEL5:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP5]], <vscale x 4 x i32> [[VEC_SEL1]], <vscale x 4 x i32> [[VEC_SEL2]]
-; CHECK-VF4IC4-NEXT:   [[VEC_ICMP6:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL5]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[VEC_ICMP6:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL5]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC4-NEXT:   [[VEC_SEL6:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP6]], <vscale x 4 x i32> [[VEC_SEL5]], <vscale x 4 x i32> [[VEC_SEL3]]
-; CHECK-VF4IC4-NEXT:   [[VEC_ICMP7:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL6]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[VEC_ICMP7:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL6]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC4-NEXT:   [[VEC_SEL7:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP7]], <vscale x 4 x i32> [[VEC_SEL6]], <vscale x 4 x i32> [[VEC_SEL4]]
-; CHECK-VF4IC4-NEXT:   [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC4-NEXT:   [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC4-NEXT:   [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
 ; CHECK-VF4IC4-NEXT:   {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3
 entry:
@@ -62,17 +62,17 @@ exit:                                     ; preds = %for.body
 define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_i32_from_icmp
 ; CHECK-VF4IC1:      vector.ph:
-; CHECK-VF4IC1:        [[TMP1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
+; CHECK-VF4IC1:        [[TMP1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i64 0
 ; CHECK-VF4IC1-NEXT:   [[SPLAT_OF_A:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-VF4IC1-NEXT:   [[TMP2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+; CHECK-VF4IC1-NEXT:   [[TMP2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %b, i64 0
 ; CHECK-VF4IC1-NEXT:   [[SPLAT_OF_B:%.*]] = shufflevector <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
 ; CHECK-VF4IC1:        [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
-; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC1-NEXT:   [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[SPLAT_OF_B]]
 ; CHECK-VF4IC1:      middle.block:
-; CHECK-VF4IC1-NEXT:   [[FIN_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
+; CHECK-VF4IC1-NEXT:   [[FIN_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i64 0
 ; CHECK-VF4IC1-NEXT:   [[FIN_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[FIN_INS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-VF4IC1-NEXT:   [[FIN_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], [[FIN_SPLAT]]
 ; CHECK-VF4IC1-NEXT:   [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_CMP]])
@@ -101,12 +101,12 @@ exit:                                     ; preds = %for.body
 define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_const_i32_from_fcmp
 ; CHECK-VF4IC1:      vector.body:
-; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
+; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
 ; CHECK-VF4IC1:        [[VEC_LOAD:%.*]] = load <vscale x 4 x float>
-; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = fcmp fast ueq <vscale x 4 x float> [[VEC_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT:   [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = fcmp fast ueq <vscale x 4 x float> [[VEC_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT:   [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC1:      middle.block:
-; CHECK-VF4IC1-NEXT:   [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT:   [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC1-NEXT:   [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
 ; CHECK-VF4IC1-NEXT:   {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2
 
@@ -158,10 +158,10 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
 ; CHECK-VF4IC1:        [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
-; CHECK-VF4IC1:        [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1:        [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC1:        [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison)
-; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT:   [[VEC_SEL_TMP:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT:   [[VEC_SEL_TMP:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]]
 ; CHECK-VF4IC1:        [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> [[VEC_SEL_TMP]], <vscale x 4 x i32> [[VEC_PHI]]
 ; CHECK-VF4IC1:      middle.block:
 ; CHECK-VF4IC1-NEXT:   [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], zeroinitializer

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll
index 2c186cecf0806..c0a1cf1918454 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll
@@ -49,7 +49,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -63,7 +63,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP12]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]])
-; CHECK-NEXT:    [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll
index 92a179ea708f0..18d0af56a786e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll
@@ -11,7 +11,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
 ; CHECK-NOTF-LABEL: @simple_memset(
 ; CHECK-NOTF:       vector.ph:
-; CHECK-NOTF:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i32 0
+; CHECK-NOTF:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
 ; CHECK-NOTF:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NOTF:       vector.body:
 ; CHECK-NOTF-NOT:     %{{.*}} = phi <vscale x 4 x i1>
@@ -19,7 +19,7 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
 
 ; CHECK-TF-NORED-LABEL: @simple_memset(
 ; CHECK-TF-NORED:       vector.ph:
-; CHECK-TF-NORED:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i32 0
+; CHECK-TF-NORED:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
 ; CHECK-TF-NORED:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-TF-NORED:       vector.body:
 ; CHECK-TF-NORED:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
@@ -27,7 +27,7 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
 
 ; CHECK-TF-NOREC-LABEL: @simple_memset(
 ; CHECK-TF-NOREC:       vector.ph:
-; CHECK-TF-NOREC:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i32 0
+; CHECK-TF-NOREC:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
 ; CHECK-TF-NOREC:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-TF-NOREC:       vector.body:
 ; CHECK-TF-NOREC:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
@@ -35,7 +35,7 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
 
 ; CHECK-TF-LABEL: @simple_memset(
 ; CHECK-TF:       vector.ph:
-; CHECK-TF:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i32 0
+; CHECK-TF:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
 ; CHECK-TF:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-TF:       vector.body:
 ; CHECK-TF:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
@@ -43,7 +43,7 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
 
 ; CHECK-TF-ONLYRED-LABEL: @simple_memset(
 ; CHECK-TF-ONLYRED:       vector.ph:
-; CHECK-TF-ONLYRED:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i32 0
+; CHECK-TF-ONLYRED:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
 ; CHECK-TF-ONLYRED:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-TF-ONLYRED:       vector.body:
 ; CHECK-TF-ONLYRED-NOT:     %{{.*}} = phi <vscale x 4 x i1>

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll
index f0633cc447dbc..b87bdfc21ebbe 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll
@@ -28,7 +28,7 @@ define void @trip1024_i64(i64* noalias nocapture noundef %dst, i64* noalias noca
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[TMP9]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <vscale x 2 x i64>*
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP11]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK1]], <vscale x 2 x i64> poison)
-; CHECK-NEXT:    [[TMP12:%.*]] = shl nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP12:%.*]] = shl nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[DST:%.*]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, i64* [[TMP13]], i32 0
 ; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i64* [[TMP14]] to <vscale x 2 x i64>*
@@ -40,7 +40,7 @@ define void @trip1024_i64(i64* noalias nocapture noundef %dst, i64* noalias noca
 ; CHECK-NEXT:    [[TMP19:%.*]] = mul i64 [[TMP18]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP19]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK3]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1024)
-; CHECK-NEXT:    [[TMP20:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK3]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP20:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK3]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <vscale x 2 x i1> [[TMP20]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll
index e52cbf15bde0a..ed59fd2447696 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll
@@ -40,7 +40,7 @@ define i32 @add_reduction_i32(i32* %ptr, i64 %n) #0 {
 ; CHECK-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]])
-; CHECK-NEXT:    [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -82,7 +82,7 @@ define i32 @add_reduction_i32(i32* %ptr, i64 %n) #0 {
 ; CHECK-IN-LOOP-NEXT:    [[TMP17:%.*]] = mul i64 [[TMP16]], 4
 ; CHECK-IN-LOOP-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]]
 ; CHECK-IN-LOOP-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]])
-; CHECK-IN-LOOP-NEXT:    [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-IN-LOOP-NEXT:    [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-IN-LOOP-NEXT:    [[TMP19:%.*]] = extractelement <vscale x 4 x i1> [[TMP18]], i32 0
 ; CHECK-IN-LOOP-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-IN-LOOP:       middle.block:
@@ -134,13 +134,13 @@ define float @add_reduction_f32(float* %ptr, i64 %n) #0 {
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr float, float* [[TMP10]], i32 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP11]] to <vscale x 4 x float>*
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[TMP13:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP13:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP13]])
 ; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]])
-; CHECK-NEXT:    [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -174,13 +174,13 @@ define float @add_reduction_f32(float* %ptr, i64 %n) #0 {
 ; CHECK-IN-LOOP-NEXT:    [[TMP11:%.*]] = getelementptr float, float* [[TMP10]], i32 0
 ; CHECK-IN-LOOP-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP11]] to <vscale x 4 x float>*
 ; CHECK-IN-LOOP-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> poison)
-; CHECK-IN-LOOP-NEXT:    [[TMP13:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-IN-LOOP-NEXT:    [[TMP13:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-IN-LOOP-NEXT:    [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP13]])
 ; CHECK-IN-LOOP-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-IN-LOOP-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 4
 ; CHECK-IN-LOOP-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]]
 ; CHECK-IN-LOOP-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]])
-; CHECK-IN-LOOP-NEXT:    [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-IN-LOOP-NEXT:    [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-IN-LOOP-NEXT:    [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0
 ; CHECK-IN-LOOP-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK-IN-LOOP:       middle.block:
@@ -231,14 +231,14 @@ define i32 @cond_xor_reduction(i32* noalias %a, i32* noalias %cond, i64 %N) #0 {
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 5, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 5, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i32, i32* [[TMP14]], i32 0
 ; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <vscale x 4 x i32>*
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP17]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> poison)
 ; CHECK-NEXT:    [[TMP18:%.*]] = xor <vscale x 4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD1]]
-; CHECK-NEXT:    [[TMP19:%.*]] = xor <vscale x 4 x i1> [[TMP13]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP19:%.*]] = xor <vscale x 4 x i1> [[TMP13]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP20:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[TMP18]], <vscale x 4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP21]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[PREDPHI]], <vscale x 4 x i32> [[VEC_PHI]]
@@ -246,7 +246,7 @@ define i32 @cond_xor_reduction(i32* noalias %a, i32* noalias %cond, i64 %N) #0 {
 ; CHECK-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP22]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP23]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[N]])
-; CHECK-NEXT:    [[TMP24:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP24:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <vscale x 4 x i1> [[TMP24]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -280,7 +280,7 @@ define i32 @cond_xor_reduction(i32* noalias %a, i32* noalias %cond, i64 %N) #0 {
 ; CHECK-IN-LOOP-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
 ; CHECK-IN-LOOP-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
 ; CHECK-IN-LOOP-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
-; CHECK-IN-LOOP-NEXT:    [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 5, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-IN-LOOP-NEXT:    [[TMP13:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 5, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-IN-LOOP-NEXT:    [[TMP14:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP9]]
 ; CHECK-IN-LOOP-NEXT:    [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer
 ; CHECK-IN-LOOP-NEXT:    [[TMP16:%.*]] = getelementptr i32, i32* [[TMP14]], i32 0
@@ -293,7 +293,7 @@ define i32 @cond_xor_reduction(i32* noalias %a, i32* noalias %cond, i64 %N) #0 {
 ; CHECK-IN-LOOP-NEXT:    [[TMP22:%.*]] = mul i64 [[TMP21]], 4
 ; CHECK-IN-LOOP-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]]
 ; CHECK-IN-LOOP-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[N]])
-; CHECK-IN-LOOP-NEXT:    [[TMP23:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-IN-LOOP-NEXT:    [[TMP23:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-IN-LOOP-NEXT:    [[TMP24:%.*]] = extractelement <vscale x 4 x i1> [[TMP23]], i32 0
 ; CHECK-IN-LOOP-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK-IN-LOOP:       middle.block:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll
index 4715282a8959d..c096a96f5de53 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll
@@ -34,13 +34,13 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[UMAX]])
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK4:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[UMAX]])
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK5:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[UMAX]])
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT12:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT11]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT14:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT13]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT16:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT15]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -103,10 +103,10 @@ define void @simple_memset(i32 %val, i32* %ptr, i64 %n) #0 {
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK23]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT19]], i64 [[UMAX]])
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK24]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT20]], i64 [[UMAX]])
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK25]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT21]], i64 [[UMAX]])
-; CHECK-NEXT:    [[TMP57:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK22]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP58:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK23]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP59:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK24]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP60:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK25]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP57:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK22]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP58:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK23]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP59:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK24]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP60:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK25]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP61:%.*]] = extractelement <vscale x 4 x i1> [[TMP57]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -158,13 +158,13 @@ define void @cond_memset(i32 %val, i32* noalias readonly %cond_ptr, i32* noalias
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK3:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[UMAX]])
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK4:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[UMAX]])
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK5:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[UMAX]])
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT15:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT14]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT17:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT16]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT18:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT19:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT18]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -257,10 +257,10 @@ define void @cond_memset(i32 %val, i32* noalias readonly %cond_ptr, i32* noalias
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK26]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT22]], i64 [[UMAX]])
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK27]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT23]], i64 [[UMAX]])
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK28]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_PART_NEXT24]], i64 [[UMAX]])
-; CHECK-NEXT:    [[TMP83:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK25]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP84:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK26]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP85:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK27]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP86:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK28]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP83:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK25]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP84:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK26]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP85:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK27]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP86:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK28]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP87:%.*]] = extractelement <vscale x 4 x i1> [[TMP83]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP87]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 000c338b93513..d953af598f301 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -23,7 +23,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -37,7 +37,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP12]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]])
-; CHECK-NEXT:    [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -81,7 +81,7 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 {
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[UMAX]])
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[VAL:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -161,7 +161,7 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP15]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]])
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[TMP16]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -224,12 +224,12 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP2]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
 ; CHECK-NEXT:    [[TMP13:%.*]] = add <vscale x 4 x i64> [[TMP12]], zeroinitializer
-; CHECK-NEXT:    [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP13]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 4, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP13]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 4, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = mul i64 4, [[TMP16]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -244,7 +244,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
 ; CHECK-NEXT:    [[TMP21:%.*]] = mul i64 [[TMP20]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP21]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[TMP2]])
-; CHECK-NEXT:    [[TMP22:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP22:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <vscale x 4 x i1> [[TMP22]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -318,7 +318,7 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP15]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]])
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[TMP16]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
@@ -386,7 +386,7 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) #
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[SRC:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP10]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP10]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
@@ -395,7 +395,7 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) #
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP13]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP14]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[N]])
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[TMP15]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
@@ -454,7 +454,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]])
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[SRC:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[SRC:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -465,7 +465,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP13:%.*]] = xor <vscale x 4 x i1> [[TMP12]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP13:%.*]] = xor <vscale x 4 x i1> [[TMP12]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP14:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i32> poison)
 ; CHECK-NEXT:    [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i1> zeroinitializer
@@ -478,7 +478,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr
 ; CHECK-NEXT:    [[TMP20:%.*]] = mul i64 [[TMP19]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP20]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[N]])
-; CHECK-NEXT:    [[TMP21:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP21:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <vscale x 4 x i1> [[TMP21]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -553,7 +553,7 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n)
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]])
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -568,7 +568,7 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n)
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP12]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[N]])
-; CHECK-NEXT:    [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       middle.block:
@@ -641,7 +641,7 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
 ; CHECK-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP16]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT3]], i64 [[UMAX]])
-; CHECK-NEXT:    [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; CHECK:       middle.block:
@@ -714,14 +714,14 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[TMP14:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD2]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP14:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD2]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP15:%.*]] = udiv <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], [[TMP14]]
 ; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr [[TMP13]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP17:%.*]] = mul i64 [[TMP16]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP17]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT3]], i64 [[UMAX]])
-; CHECK-NEXT:    [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <vscale x 4 x i1> [[TMP18]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       middle.block:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
index 7f1c98c907a6c..22d56cb35c049 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -35,7 +35,7 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
 ; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP10]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP11]], align 8
-; CHECK-NEXT:    [[TMP12:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i32 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP15:%.*]] = shl i32 [[TMP14]], 3
@@ -127,7 +127,7 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
 ; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP17]], align 8
-; CHECK-NEXT:    [[TMP18:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i32 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP18:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP21:%.*]] = shl i32 [[TMP20]], 3

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index d9acd499ff0a1..0d3f368d3551b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -6,10 +6,10 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
 ; CHECK-LABEL: @widen_extractvalue(
 ; CHECK: vector.body:
 ; CHECK:        [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
-; CHECK-NEXT:   [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i32 0
+; CHECK-NEXT:   [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
 ; CHECK-NEXT:   [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:   [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
-; CHECK-NEXT:   [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i32 0
+; CHECK-NEXT:   [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i64 0
 ; CHECK-NEXT:   [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK:        [[ADD:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT2]]
 entry:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
index 467dfe907f6ab..99ac379fa8e29 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -58,11 +58,11 @@ define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 1, [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP6]], 0
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP10]]
-; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[TMP12]], i64 1
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0
@@ -72,7 +72,7 @@ define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i8, i8* [[TMP16]], i32 0
 ; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8* [[TMP17]] to <vscale x 2 x i8>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP18]], align 1
-; CHECK-NEXT:    [[TMP19:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP19:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8* [[TMP17]] to <vscale x 2 x i8>*
 ; CHECK-NEXT:    store <vscale x 2 x i8> [[TMP19]], <vscale x 2 x i8>* [[TMP20]], align 1
 ; CHECK-NEXT:    [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
@@ -151,17 +151,17 @@ define void @pointer_induction(i8* noalias %start, i64 %N) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 1, [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP6]], 0
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP10]]
-; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <vscale x 2 x i8*> [[TMP12]], i32 0
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, i8* [[TMP13]], i32 0
 ; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <vscale x 2 x i8>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP15]], align 1
-; CHECK-NEXT:    [[TMP16:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i32 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP16:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i8* [[TMP14]] to <vscale x 2 x i8>*
 ; CHECK-NEXT:    store <vscale x 2 x i8> [[TMP16]], <vscale x 2 x i8>* [[TMP17]], align 1
 ; CHECK-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vscale.i64()

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index ec36a884e1f6d..462b86458cab6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -36,22 +36,22 @@ define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias noca
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw nsw i64 [[TMP5]], 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
-; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = shl <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = shl <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <vscale x 4 x i64> [[VECTOR_GEP]]
 ; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP6]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT2]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT3]], [[TMP10]]
-; CHECK-NEXT:    [[VECTOR_GEP4:%.*]] = shl <vscale x 4 x i64> [[TMP11]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[VECTOR_GEP4:%.*]] = shl <vscale x 4 x i64> [[TMP11]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <vscale x 4 x i64> [[VECTOR_GEP4]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, <vscale x 4 x i32*> [[TMP9]], i64 1
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, <vscale x 4 x i32*> [[TMP12]], i64 1
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP12]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP13]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP14]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[TMP15:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP16:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER5]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP12]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP13]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP14]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[TMP15:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP16:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER5]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <vscale x 4 x i32>*
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP15]], <vscale x 4 x i32>* [[TMP18]], align 4
@@ -61,8 +61,8 @@ define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias noca
 ; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 [[TMP21]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <vscale x 4 x i32>*
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP16]], <vscale x 4 x i32>* [[TMP23]], align 4
-; CHECK-NEXT:    [[TMP24:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER6]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP25:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP24:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER6]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP25:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <vscale x 4 x i32>*
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP24]], <vscale x 4 x i32>* [[TMP27]], align 4
@@ -167,8 +167,8 @@ define void @widen_2ptrs_phi_unrolled(i32* noalias nocapture %dst, i32* noalias
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i64 [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
 ; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP9]], align 4
-; CHECK-NEXT:    [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP11:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP11:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[NEXT_GEP5]] to <vscale x 4 x i32>*
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32>* [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vscale.i32()

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll
index 3d3ae81b133e9..f0fc30afa9448 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll
@@ -17,7 +17,7 @@ define void @uniform_load(i32* noalias %dst, i32* noalias readonly %src, i64 %n)
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[IDX]], 0
 ; CHECK-NEXT:    [[LOAD_VAL:%.*]] = load i32, i32* %src, align 4
 ; CHECK-NOT:     load i32, i32* %src, align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[LOAD_VAL]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[LOAD_VAL]], i64 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* %dst, i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0
@@ -53,7 +53,7 @@ define void @cond_uniform_load(i32* nocapture %dst, i32* nocapture readonly %src
 ; CHECK-LABEL: @cond_uniform_load(
 ; CHECK:       vector.ph:
 ; CHECK:         [[INIT_ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 %n)
-; CHECK:         [[TMP1:%.*]] = insertelement <4 x i32*> poison, i32* %src, i32 0
+; CHECK:         [[TMP1:%.*]] = insertelement <4 x i32*> poison, i32* %src, i64 0
 ; CHECK-NEXT:    [[SRC_SPLAT:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> poison, <4 x i32> zeroinitializer
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[IDX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[IDX_NEXT:%.*]], %vector.body ]

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
index c4ba0839a59e5..d971aa5bfc0cd 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
@@ -515,12 +515,12 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[N_VEC]], [[X:%.*]]
 ; CHECK-NEXT:    [[IND_END:%.*]] = add i32 3, [[TMP0]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> <i32 3, i32 3, i32 3, i32 3>, [[TMP1]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[X]], 4
-; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll
index 6d3ffab0a5980..7b9820dd101bc 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll
@@ -528,9 +528,9 @@ define dso_local void @select_not_allowed(ptr noalias nocapture %A, ptr noalias
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[C:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT1]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll
index 7aca0317acc5f..d58ff5051c621 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll
@@ -20,7 +20,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
index b366f72b59dc8..8a92ffa1f7037 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
@@ -17,7 +17,7 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -54,9 +54,9 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; FIXED-NEXT:  entry:
 ; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
@@ -121,7 +121,7 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -158,9 +158,9 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; FIXED-NEXT:  entry:
 ; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
@@ -225,7 +225,7 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -262,9 +262,9 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; FIXED-NEXT:  entry:
 ; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
@@ -329,7 +329,7 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -366,9 +366,9 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; FIXED-NEXT:  entry:
 ; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
@@ -433,7 +433,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -443,9 +443,9 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <vscale x 1 x i64> [[BROADCAST_SPLAT]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP6:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP7:%.*]] = udiv <vscale x 1 x i64> [[WIDE_LOAD]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP8:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[TMP7]], <vscale x 1 x i64> [[WIDE_LOAD]]
 ; CHECK-NEXT:    store <vscale x 1 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
@@ -480,9 +480,9 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; FIXED-NEXT:  entry:
 ; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
@@ -567,7 +567,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -577,9 +577,9 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <vscale x 1 x i64> [[BROADCAST_SPLAT]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP6:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP7:%.*]] = sdiv <vscale x 1 x i64> [[WIDE_LOAD]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP8:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[TMP7]], <vscale x 1 x i64> [[WIDE_LOAD]]
 ; CHECK-NEXT:    store <vscale x 1 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
@@ -614,9 +614,9 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; FIXED-NEXT:  entry:
 ; FIXED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXED:       vector.ph:
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
+; FIXED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0
 ; FIXED-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXED:       vector.body:
@@ -708,9 +708,9 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP6:%.*]] = udiv <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 27, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP7:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP6:%.*]] = udiv <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 27, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[TMP6]], <vscale x 1 x i64> [[WIDE_LOAD]]
 ; CHECK-NEXT:    store <vscale x 1 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
@@ -833,9 +833,9 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP6:%.*]] = sdiv <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 27, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP7:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP6:%.*]] = sdiv <vscale x 1 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 27, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <vscale x 1 x i1> [[TMP5]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i64> [[TMP6]], <vscale x 1 x i64> [[WIDE_LOAD]]
 ; CHECK-NEXT:    store <vscale x 1 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
@@ -970,14 +970,14 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul i32 [[TMP13]], 8
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 [[TMP14]]
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP15]], align 1
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne <vscale x 8 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -128, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne <vscale x 8 x i8> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -128, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP18:%.*]] = select <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP19:%.*]] = select <vscale x 8 x i1> [[TMP17]], <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne <vscale x 8 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -128, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne <vscale x 8 x i8> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -128, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP18:%.*]] = select <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP19:%.*]] = select <vscale x 8 x i1> [[TMP17]], <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 -1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP20:%.*]] = sdiv <vscale x 8 x i8> [[WIDE_LOAD]], [[TMP18]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = sdiv <vscale x 8 x i8> [[WIDE_LOAD1]], [[TMP19]]
-; CHECK-NEXT:    [[TMP22:%.*]] = xor <vscale x 8 x i1> [[TMP16]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP23:%.*]] = xor <vscale x 8 x i1> [[TMP17]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP22:%.*]] = xor <vscale x 8 x i1> [[TMP16]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP23:%.*]] = xor <vscale x 8 x i1> [[TMP17]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i8> [[TMP20]], <vscale x 8 x i8> [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[PREDPHI2:%.*]] = select <vscale x 8 x i1> [[TMP17]], <vscale x 8 x i8> [[TMP21]], <vscale x 8 x i8> [[WIDE_LOAD1]]
 ; CHECK-NEXT:    store <vscale x 8 x i8> [[PREDPHI]], ptr [[TMP12]], align 1

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
index f7aa8e0aa12a7..8e387d6196eef 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
@@ -109,9 +109,9 @@ define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[N_VEC]], 8
 ; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[START]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[START]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x ptr> poison, ptr [[START]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x ptr> poison, ptr [[START]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT3]], <2 x ptr> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
index 1046f4cda4cdf..42e632f7c8d7d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
@@ -27,7 +27,7 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP10]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i8> poison)
-; CHECK-NEXT:    [[TMP11:%.*]] = shl <vscale x 8 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP11:%.*]] = shl <vscale x 8 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP13]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i8> poison)

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
index 810236401801c..78141e41b472e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll
@@ -22,16 +22,16 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) {
 ; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; VLENUNK-NEXT:    [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
 ; VLENUNK-NEXT:    [[TMP5:%.*]] = add <vscale x 2 x i64> [[TMP4]], zeroinitializer
-; VLENUNK-NEXT:    [[TMP6:%.*]] = mul <vscale x 2 x i64> [[TMP5]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; VLENUNK-NEXT:    [[TMP6:%.*]] = mul <vscale x 2 x i64> [[TMP5]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 ; VLENUNK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP6]]
 ; VLENUNK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
 ; VLENUNK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 2
 ; VLENUNK-NEXT:    [[TMP9:%.*]] = mul i64 1, [[TMP8]]
-; VLENUNK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i32 0
+; VLENUNK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
 ; VLENUNK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0
+; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i64 0
 ; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0
+; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i64 0
 ; VLENUNK-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT4]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
 ; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLENUNK:       vector.body:
@@ -44,8 +44,8 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) {
 ; VLENUNK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], 0
 ; VLENUNK-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP13]], 1
 ; VLENUNK-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], [[TMP14]]
-; VLENUNK-NEXT:    [[TMP16:%.*]] = icmp ult <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 512, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; VLENUNK-NEXT:    [[TMP17:%.*]] = icmp ult <vscale x 2 x i64> [[STEP_ADD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 512, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; VLENUNK-NEXT:    [[TMP16:%.*]] = icmp ult <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 512, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; VLENUNK-NEXT:    [[TMP17:%.*]] = icmp ult <vscale x 2 x i64> [[STEP_ADD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 512, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 ; VLENUNK-NEXT:    [[TMP18:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]]
 ; VLENUNK-NEXT:    [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP15]]
 ; VLENUNK-NEXT:    [[TMP20:%.*]] = getelementptr i32, ptr [[TMP18]], i32 0
@@ -54,8 +54,8 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) {
 ; VLENUNK-NEXT:    [[TMP22:%.*]] = mul i32 [[TMP21]], 2
 ; VLENUNK-NEXT:    [[TMP23:%.*]] = getelementptr i32, ptr [[TMP18]], i32 [[TMP22]]
 ; VLENUNK-NEXT:    [[WIDE_MASKED_LOAD2:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP23]], i32 4, <vscale x 2 x i1> [[TMP17]], <vscale x 2 x i32> poison)
-; VLENUNK-NEXT:    [[TMP24:%.*]] = xor <vscale x 2 x i1> [[TMP16]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
-; VLENUNK-NEXT:    [[TMP25:%.*]] = xor <vscale x 2 x i1> [[TMP17]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; VLENUNK-NEXT:    [[TMP24:%.*]] = xor <vscale x 2 x i1> [[TMP16]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
+; VLENUNK-NEXT:    [[TMP25:%.*]] = xor <vscale x 2 x i1> [[TMP17]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ; VLENUNK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP24]], <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> [[WIDE_MASKED_LOAD]]
 ; VLENUNK-NEXT:    [[PREDPHI3:%.*]] = select <vscale x 2 x i1> [[TMP25]], <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> [[WIDE_MASKED_LOAD2]]
 ; VLENUNK-NEXT:    [[TMP26:%.*]] = add <vscale x 2 x i32> [[PREDPHI]], [[BROADCAST_SPLAT]]

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
index 4df17b95d849d..ac56579af2d26 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
@@ -39,20 +39,20 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; RV32-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 16
 ; RV32-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
 ; RV32-NEXT:    [[TMP4:%.*]] = add <vscale x 1 x i64> [[TMP3]], zeroinitializer
-; RV32-NEXT:    [[TMP5:%.*]] = mul <vscale x 1 x i64> [[TMP4]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 16, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; RV32-NEXT:    [[TMP5:%.*]] = mul <vscale x 1 x i64> [[TMP4]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 16, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; RV32-NEXT:    [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP5]]
 ; RV32-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
 ; RV32-NEXT:    [[TMP7:%.*]] = mul i64 16, [[TMP6]]
-; RV32-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP7]], i32 0
+; RV32-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP7]], i64 0
 ; RV32-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; RV32-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; RV32:       vector.body:
 ; RV32-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; RV32-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; RV32-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <vscale x 1 x i64> [[VEC_IND]]
-; RV32-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> [[TMP8]], i32 4, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i32> poison), !alias.scope !0
-; RV32-NEXT:    [[TMP9:%.*]] = icmp slt <vscale x 1 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 1 x i32> insertelement (<vscale x 1 x i32> poison, i32 100, i32 0), <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer)
-; RV32-NEXT:    [[TMP10:%.*]] = shl nuw nsw <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; RV32-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> [[TMP8]], i32 4, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i32> poison), !alias.scope !0
+; RV32-NEXT:    [[TMP9:%.*]] = icmp slt <vscale x 1 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 1 x i32> insertelement (<vscale x 1 x i32> poison, i32 100, i64 0), <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer)
+; RV32-NEXT:    [[TMP10:%.*]] = shl nuw nsw <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; RV32-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[B]], <vscale x 1 x i64> [[TMP10]]
 ; RV32-NEXT:    [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> [[TMP11]], i32 8, <vscale x 1 x i1> [[TMP9]], <vscale x 1 x double> poison), !alias.scope !3
 ; RV32-NEXT:    [[TMP12:%.*]] = sitofp <vscale x 1 x i32> [[WIDE_MASKED_GATHER]] to <vscale x 1 x double>
@@ -117,20 +117,20 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; RV64-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 16
 ; RV64-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
 ; RV64-NEXT:    [[TMP4:%.*]] = add <vscale x 1 x i64> [[TMP3]], zeroinitializer
-; RV64-NEXT:    [[TMP5:%.*]] = mul <vscale x 1 x i64> [[TMP4]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 16, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; RV64-NEXT:    [[TMP5:%.*]] = mul <vscale x 1 x i64> [[TMP4]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 16, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; RV64-NEXT:    [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP5]]
 ; RV64-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
 ; RV64-NEXT:    [[TMP7:%.*]] = mul i64 16, [[TMP6]]
-; RV64-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP7]], i32 0
+; RV64-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP7]], i64 0
 ; RV64-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; RV64-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; RV64:       vector.body:
 ; RV64-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; RV64-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; RV64-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <vscale x 1 x i64> [[VEC_IND]]
-; RV64-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> [[TMP8]], i32 4, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i32> poison), !alias.scope !0
-; RV64-NEXT:    [[TMP9:%.*]] = icmp slt <vscale x 1 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 1 x i32> insertelement (<vscale x 1 x i32> poison, i32 100, i32 0), <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer)
-; RV64-NEXT:    [[TMP10:%.*]] = shl nuw nsw <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; RV64-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> [[TMP8]], i32 4, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i32> poison), !alias.scope !0
+; RV64-NEXT:    [[TMP9:%.*]] = icmp slt <vscale x 1 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 1 x i32> insertelement (<vscale x 1 x i32> poison, i32 100, i64 0), <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer)
+; RV64-NEXT:    [[TMP10:%.*]] = shl nuw nsw <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; RV64-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[B]], <vscale x 1 x i64> [[TMP10]]
 ; RV64-NEXT:    [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> [[TMP11]], i32 8, <vscale x 1 x i1> [[TMP9]], <vscale x 1 x double> poison), !alias.scope !3
 ; RV64-NEXT:    [[TMP12:%.*]] = sitofp <vscale x 1 x i32> [[WIDE_MASKED_GATHER]] to <vscale x 1 x double>

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll
index c90b23b713c85..ab4e9a0d108d4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll
@@ -22,7 +22,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; VLENUNK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLENUNK:       vector.body:
@@ -64,7 +64,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; VLEN128-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; VLEN128-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLEN128:       vector.body:
@@ -129,9 +129,9 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) {
 ; VLENUNK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
 ; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
 ; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0
+; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i64 0
 ; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0
+; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i64 0
 ; VLENUNK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
 ; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLENUNK:       vector.body:
@@ -191,9 +191,9 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) {
 ; VLEN128-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
 ; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
 ; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0
+; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i64 0
 ; VLEN128-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0
+; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i64 0
 ; VLEN128-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
 ; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLEN128:       vector.body:
@@ -327,7 +327,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
 ; VLENUNK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLENUNK:       vector.body:
@@ -337,7 +337,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
 ; VLENUNK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; VLENUNK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
 ; VLENUNK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]]
-; VLENUNK-NEXT:    call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; VLENUNK-NEXT:    call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
 ; VLENUNK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
 ; VLENUNK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
 ; VLENUNK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -369,7 +369,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
 ; VLEN128-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; VLEN128-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLEN128:       vector.body:
@@ -379,7 +379,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
 ; VLEN128-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; VLEN128-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
 ; VLEN128-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]]
-; VLEN128-NEXT:    call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; VLEN128-NEXT:    call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
 ; VLEN128-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
 ; VLEN128-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
 ; VLEN128-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -438,7 +438,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
 ; VLENUNK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; VLENUNK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
 ; VLENUNK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]]
-; VLENUNK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison)
+; VLENUNK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison)
 ; VLENUNK-NEXT:    [[TMP6]] = add <vscale x 1 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
 ; VLENUNK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
 ; VLENUNK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
@@ -485,7 +485,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64
 ; VLEN128-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; VLEN128-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
 ; VLEN128-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]]
-; VLEN128-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison)
+; VLEN128-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> poison)
 ; VLEN128-NEXT:    [[TMP6]] = add <vscale x 1 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
 ; VLEN128-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
 ; VLEN128-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
@@ -543,7 +543,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; VLENUNK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLENUNK:       vector.body:
@@ -581,7 +581,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; VLEN128-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; VLEN128-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLEN128:       vector.body:
@@ -635,7 +635,7 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) {
 ; VLENUNK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; VLENUNK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; VLENUNK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[V:%.*]], i32 0
+; VLENUNK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[V:%.*]], i64 0
 ; VLENUNK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
 ; VLENUNK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLENUNK:       vector.body:
@@ -673,7 +673,7 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) {
 ; VLEN128-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; VLEN128-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; VLEN128-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[V:%.*]], i32 0
+; VLEN128-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[V:%.*]], i64 0
 ; VLEN128-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
 ; VLEN128-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VLEN128:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
index a60c35cf32022..166a7dba2c829 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll
@@ -19,7 +19,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -84,7 +84,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
 ; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -218,7 +218,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -275,7 +275,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
 ; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
index 88bf6f6666095..b32cb3eddfbd2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
@@ -10,9 +10,9 @@ define i32 @select_icmp(i32 %x, i32 %y, i32* nocapture readonly %c, i64 %n) #0 {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 %n, 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -39,9 +39,9 @@ define i32 @select_icmp(i32 %x, i32 %y, i32* nocapture readonly %c, i64 %n) #0 {
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
 ; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
@@ -87,9 +87,9 @@ define i32 @select_fcmp(float %x, i32 %y, float* nocapture readonly %c, i64 %n)
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 %n, 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -116,9 +116,9 @@ define i32 @select_fcmp(float %x, i32 %y, float* nocapture readonly %c, i64 %n)
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
 ; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
@@ -192,21 +192,21 @@ define i32 @select_const_i32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 {
 ; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
 ; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; SCALABLE-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[V:%.*]], i64 [[TMP4]]
 ; SCALABLE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
 ; SCALABLE-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <vscale x 4 x i32>*
 ; SCALABLE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP7]], align 4
-; SCALABLE-NEXT:    [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; SCALABLE-NEXT:    [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 4
 ; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
 ; SCALABLE-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; SCALABLE-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; SCALABLE:       middle.block:
-; SCALABLE-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[TMP9]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[TMP9]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[RDX_SELECT_CMP]])
 ; SCALABLE-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 7, i32 3
 ;
@@ -233,9 +233,9 @@ define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 %n, 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; CHECK-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -252,7 +252,7 @@ define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
@@ -264,9 +264,9 @@ define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
 ; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; SCALABLE-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A:%.*]], i32 0
+; SCALABLE-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A:%.*]], i64 0
 ; SCALABLE-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
@@ -277,7 +277,7 @@ define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64
 ; SCALABLE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
 ; SCALABLE-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <vscale x 4 x i32>*
 ; SCALABLE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP7]], align 4
-; SCALABLE-NEXT:    [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[BROADCAST_SPLAT]]
 ; SCALABLE-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 4
@@ -285,7 +285,7 @@ define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64
 ; SCALABLE-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; SCALABLE-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; SCALABLE:       middle.block:
-; SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A]], i32 0
+; SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A]], i64 0
 ; SCALABLE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; SCALABLE-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[TMP9]], [[DOTSPLAT]]
 ; SCALABLE-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[RDX_SELECT_CMP]])
@@ -342,21 +342,21 @@ define i32 @select_const_i32_from_fcmp(float* nocapture readonly %v, i64 %n) #0
 ; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
 ; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; SCALABLE-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; SCALABLE-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[V:%.*]], i64 [[TMP4]]
 ; SCALABLE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0
 ; SCALABLE-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <vscale x 4 x float>*
 ; SCALABLE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* [[TMP7]], align 4
-; SCALABLE-NEXT:    [[TMP8:%.*]] = fcmp fast ueq <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
-; SCALABLE-NEXT:    [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP8:%.*]] = fcmp fast ueq <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 4
 ; SCALABLE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
 ; SCALABLE-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; SCALABLE-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; SCALABLE:       middle.block:
-; SCALABLE-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[TMP9]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[TMP9]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[RDX_SELECT_CMP]])
 ; SCALABLE-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 1, i32 2
 ;
@@ -449,14 +449,14 @@ define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src
 ; SCALABLE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
 ; SCALABLE-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <vscale x 4 x i32>*
 ; SCALABLE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP7]], align 4
-; SCALABLE-NEXT:    [[TMP8:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP8:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[TMP9:%.*]] = getelementptr i32, i32* [[SRC2:%.*]], i64 [[TMP4]]
 ; SCALABLE-NEXT:    [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 0
 ; SCALABLE-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <vscale x 4 x i32>*
 ; SCALABLE-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> poison)
-; SCALABLE-NEXT:    [[TMP12:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; SCALABLE-NEXT:    [[TMP13:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]]
-; SCALABLE-NEXT:    [[TMP14:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP12:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP13:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]]
+; SCALABLE-NEXT:    [[TMP14:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[PREDPHI]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP13]], <vscale x 4 x i32> [[VEC_PHI]]
 ; SCALABLE-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 4

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll
index 1a3ed3b60b6db..3314eaa6301f1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll
@@ -76,7 +76,7 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP10]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i32> poison)
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <vscale x 2 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i32 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <vscale x 2 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP11]], ptr [[TMP10]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul i32 [[TMP12]], 2

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 00d73463555ae..15a9140ec0234 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -24,7 +24,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6
 ; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SCALABLE-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 8
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP3]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP3]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; SCALABLE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
@@ -60,7 +60,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6
 ; FIXEDLEN-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; FIXEDLEN-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
 ; FIXEDLEN-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXEDLEN-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
 ; FIXEDLEN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
@@ -106,7 +106,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6
 ; TF-SCALABLE-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 0
 ; TF-SCALABLE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)
 ; TF-SCALABLE-NEXT:    [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 8
-; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i32 0
+; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i64 0
 ; TF-SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; TF-SCALABLE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]]
 ; TF-SCALABLE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
@@ -140,7 +140,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6
 ; TF-FIXEDLEN-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; TF-FIXEDLEN-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; TF-FIXEDLEN-NEXT:    [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8
-; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i32 0
+; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0
 ; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
 ; TF-FIXEDLEN-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
@@ -196,7 +196,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap
 ; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SCALABLE-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 8
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP3]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP3]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; SCALABLE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
@@ -233,7 +233,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap
 ; FIXEDLEN-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; FIXEDLEN-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
 ; FIXEDLEN-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXEDLEN-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
 ; FIXEDLEN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
@@ -286,7 +286,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap
 ; TF-FIXEDLEN-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; TF-FIXEDLEN-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; TF-FIXEDLEN-NEXT:    [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8
-; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i32 0
+; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0
 ; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
 ; TF-FIXEDLEN-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
@@ -341,22 +341,22 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; SCALABLE-NEXT:    [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = add <vscale x 1 x i64> [[TMP2]], zeroinitializer
-; SCALABLE-NEXT:    [[TMP4:%.*]] = mul <vscale x 1 x i64> [[TMP3]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP4:%.*]] = mul <vscale x 1 x i64> [[TMP3]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP4]]
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP6:%.*]] = mul i64 1, [[TMP5]]
-; SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i32 0
+; SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i64 0
 ; SCALABLE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
 ; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
 ; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SCALABLE-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SCALABLE-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
-; SCALABLE-NEXT:    [[TMP8:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP8:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 1 x i1> [[TMP8]], <vscale x 1 x i64> poison)
-; SCALABLE-NEXT:    [[TMP9:%.*]] = xor <vscale x 1 x i1> [[TMP8]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP9:%.*]] = xor <vscale x 1 x i1> [[TMP8]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP8]], <vscale x 1 x i64> [[WIDE_MASKED_GATHER]], <vscale x 1 x i64> zeroinitializer
 ; SCALABLE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]]
 ; SCALABLE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
@@ -393,9 +393,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; FIXEDLEN-NEXT:  entry:
 ; FIXEDLEN-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXEDLEN:       vector.ph:
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer
 ; FIXEDLEN-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXEDLEN:       vector.body:
@@ -459,13 +459,13 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; TF-SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
 ; TF-SCALABLE-NEXT:    [[TMP5:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
 ; TF-SCALABLE-NEXT:    [[TMP6:%.*]] = add <vscale x 1 x i64> [[TMP5]], zeroinitializer
-; TF-SCALABLE-NEXT:    [[TMP7:%.*]] = mul <vscale x 1 x i64> [[TMP6]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; TF-SCALABLE-NEXT:    [[TMP7:%.*]] = mul <vscale x 1 x i64> [[TMP6]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; TF-SCALABLE-NEXT:    [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP7]]
 ; TF-SCALABLE-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
 ; TF-SCALABLE-NEXT:    [[TMP9:%.*]] = mul i64 1, [[TMP8]]
-; TF-SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP9]], i32 0
+; TF-SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP9]], i64 0
 ; TF-SCALABLE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.*]], i32 0
+; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.*]], i64 0
 ; TF-SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
 ; TF-SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TF-SCALABLE:       vector.body:
@@ -473,10 +473,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; TF-SCALABLE-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; TF-SCALABLE-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 0
 ; TF-SCALABLE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP10]], i64 1024)
-; TF-SCALABLE-NEXT:    [[TMP11:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; TF-SCALABLE-NEXT:    [[TMP11:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; TF-SCALABLE-NEXT:    [[TMP12:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP11]], <vscale x 1 x i1> zeroinitializer
 ; TF-SCALABLE-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 1 x i1> [[TMP12]], <vscale x 1 x i64> poison)
-; TF-SCALABLE-NEXT:    [[TMP13:%.*]] = xor <vscale x 1 x i1> [[TMP11]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+; TF-SCALABLE-NEXT:    [[TMP13:%.*]] = xor <vscale x 1 x i1> [[TMP11]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
 ; TF-SCALABLE-NEXT:    [[TMP14:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP13]], <vscale x 1 x i1> zeroinitializer
 ; TF-SCALABLE-NEXT:    [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP12]], <vscale x 1 x i64> [[WIDE_MASKED_GATHER]], <vscale x 1 x i64> zeroinitializer
 ; TF-SCALABLE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]]
@@ -514,7 +514,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; TF-FIXEDLEN-NEXT:  entry:
 ; TF-FIXEDLEN-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TF-FIXEDLEN:       vector.ph:
-; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i32 0
+; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0
 ; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TF-FIXEDLEN:       vector.body:
@@ -593,7 +593,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
 ; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SCALABLE-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 1
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP3]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP3]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; SCALABLE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
@@ -629,7 +629,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
 ; FIXEDLEN-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; FIXEDLEN-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
 ; FIXEDLEN-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 1
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXEDLEN-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
 ; FIXEDLEN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
@@ -675,7 +675,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
 ; TF-SCALABLE-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 0
 ; TF-SCALABLE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)
 ; TF-SCALABLE-NEXT:    [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 1
-; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i32 0
+; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i64 0
 ; TF-SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; TF-SCALABLE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]]
 ; TF-SCALABLE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
@@ -709,7 +709,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
 ; TF-FIXEDLEN-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; TF-FIXEDLEN-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; TF-FIXEDLEN-NEXT:    [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 1
-; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i32 0
+; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0
 ; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
 ; TF-FIXEDLEN-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
@@ -760,7 +760,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
 ; SCALABLE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
@@ -795,9 +795,9 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
 ; FIXEDLEN-NEXT:  entry:
 ; FIXEDLEN-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXEDLEN:       vector.ph:
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXEDLEN-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXEDLEN:       vector.body:
@@ -843,7 +843,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
 ; TF-SCALABLE-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
 ; TF-SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
 ; TF-SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; TF-SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; TF-SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TF-SCALABLE:       vector.body:
@@ -878,7 +878,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
 ; TF-FIXEDLEN-NEXT:  entry:
 ; TF-FIXEDLEN-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TF-FIXEDLEN:       vector.ph:
-; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TF-FIXEDLEN:       vector.body:
@@ -934,16 +934,16 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
 ; SCALABLE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
 ; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SCALABLE-NEXT:    [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
-; SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[INDEX]], i32 0
+; SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[INDEX]], i64 0
 ; SCALABLE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP2]]
-; SCALABLE-NEXT:    [[TMP4:%.*]] = mul <vscale x 1 x i64> [[TMP3]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP4:%.*]] = mul <vscale x 1 x i64> [[TMP3]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = add <vscale x 1 x i64> [[DOTSPLAT]], [[TMP4]]
 ; SCALABLE-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
 ; SCALABLE-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
@@ -978,13 +978,13 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
 ; FIXEDLEN-NEXT:  entry:
 ; FIXEDLEN-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXEDLEN:       vector.ph:
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT4]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT6]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXEDLEN-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXEDLEN:       vector.body:
@@ -1036,15 +1036,15 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
 ; TF-SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
 ; TF-SCALABLE-NEXT:    [[TMP5:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
 ; TF-SCALABLE-NEXT:    [[TMP6:%.*]] = add <vscale x 1 x i64> [[TMP5]], zeroinitializer
-; TF-SCALABLE-NEXT:    [[TMP7:%.*]] = mul <vscale x 1 x i64> [[TMP6]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; TF-SCALABLE-NEXT:    [[TMP7:%.*]] = mul <vscale x 1 x i64> [[TMP6]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; TF-SCALABLE-NEXT:    [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP7]]
 ; TF-SCALABLE-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
 ; TF-SCALABLE-NEXT:    [[TMP9:%.*]] = mul i64 1, [[TMP8]]
-; TF-SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP9]], i32 0
+; TF-SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP9]], i64 0
 ; TF-SCALABLE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.*]], i32 0
+; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.*]], i64 0
 ; TF-SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
-; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; TF-SCALABLE-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; TF-SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TF-SCALABLE:       vector.body:
@@ -1081,9 +1081,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
 ; TF-FIXEDLEN-NEXT:  entry:
 ; TF-FIXEDLEN-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TF-FIXEDLEN:       vector.ph:
-; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i32 0
+; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0
 ; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TF-FIXEDLEN:       vector.body:
@@ -1143,22 +1143,22 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
 ; SCALABLE-NEXT:    [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
 ; SCALABLE-NEXT:    [[TMP3:%.*]] = add <vscale x 1 x i64> [[TMP2]], zeroinitializer
-; SCALABLE-NEXT:    [[TMP4:%.*]] = mul <vscale x 1 x i64> [[TMP3]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP4:%.*]] = mul <vscale x 1 x i64> [[TMP3]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP4]]
 ; SCALABLE-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[TMP6:%.*]] = mul i64 1, [[TMP5]]
-; SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i32 0
+; SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i64 0
 ; SCALABLE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
 ; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
 ; SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SCALABLE-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SCALABLE-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
-; SCALABLE-NEXT:    [[TMP8:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; SCALABLE-NEXT:    [[TMP8:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; SCALABLE-NEXT:    call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 1 x i1> [[TMP8]])
 ; SCALABLE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]]
 ; SCALABLE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0
@@ -1194,13 +1194,13 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
 ; FIXEDLEN-NEXT:  entry:
 ; FIXEDLEN-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXEDLEN:       vector.ph:
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT4]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT6]], <2 x ptr> poison, <2 x i32> zeroinitializer
 ; FIXEDLEN-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXEDLEN:       vector.body:
@@ -1259,15 +1259,15 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
 ; TF-SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
 ; TF-SCALABLE-NEXT:    [[TMP5:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
 ; TF-SCALABLE-NEXT:    [[TMP6:%.*]] = add <vscale x 1 x i64> [[TMP5]], zeroinitializer
-; TF-SCALABLE-NEXT:    [[TMP7:%.*]] = mul <vscale x 1 x i64> [[TMP6]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; TF-SCALABLE-NEXT:    [[TMP7:%.*]] = mul <vscale x 1 x i64> [[TMP6]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; TF-SCALABLE-NEXT:    [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP7]]
 ; TF-SCALABLE-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
 ; TF-SCALABLE-NEXT:    [[TMP9:%.*]] = mul i64 1, [[TMP8]]
-; TF-SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP9]], i32 0
+; TF-SCALABLE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP9]], i64 0
 ; TF-SCALABLE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; TF-SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.*]], i32 0
+; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.*]], i64 0
 ; TF-SCALABLE-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
 ; TF-SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TF-SCALABLE:       vector.body:
@@ -1275,11 +1275,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
 ; TF-SCALABLE-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; TF-SCALABLE-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 0
 ; TF-SCALABLE-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP10]], i64 1024)
-; TF-SCALABLE-NEXT:    [[TMP11:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+; TF-SCALABLE-NEXT:    [[TMP11:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
 ; TF-SCALABLE-NEXT:    [[TMP12:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP11]], <vscale x 1 x i1> zeroinitializer
 ; TF-SCALABLE-NEXT:    call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 1 x i1> [[TMP12]])
 ; TF-SCALABLE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]]
-; TF-SCALABLE-NEXT:    [[TMP14:%.*]] = xor <vscale x 1 x i1> [[TMP11]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+; TF-SCALABLE-NEXT:    [[TMP14:%.*]] = xor <vscale x 1 x i1> [[TMP11]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
 ; TF-SCALABLE-NEXT:    [[TMP15:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP14]], <vscale x 1 x i1> zeroinitializer
 ; TF-SCALABLE-NEXT:    [[TMP16:%.*]] = or <vscale x 1 x i1> [[TMP12]], [[TMP15]]
 ; TF-SCALABLE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
@@ -1314,9 +1314,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
 ; TF-FIXEDLEN-NEXT:  entry:
 ; TF-FIXEDLEN-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TF-FIXEDLEN:       vector.ph:
-; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i32 0
+; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0
 ; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT1]], <2 x ptr> poison, <2 x i32> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TF-FIXEDLEN:       vector.body:
@@ -1386,7 +1386,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
 ; SCALABLE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
 ; SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SCALABLE:       vector.body:
@@ -1421,9 +1421,9 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
 ; FIXEDLEN-NEXT:  entry:
 ; FIXEDLEN-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; FIXEDLEN:       vector.ph:
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0
+; FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0
 ; FIXEDLEN-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; FIXEDLEN-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FIXEDLEN:       vector.body:
@@ -1469,7 +1469,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
 ; TF-SCALABLE-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
 ; TF-SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
 ; TF-SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
+; TF-SCALABLE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i64 0
 ; TF-SCALABLE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
 ; TF-SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TF-SCALABLE:       vector.body:
@@ -1504,7 +1504,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
 ; TF-FIXEDLEN-NEXT:  entry:
 ; TF-FIXEDLEN-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; TF-FIXEDLEN:       vector.ph:
-; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0
+; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0
 ; TF-FIXEDLEN-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TF-FIXEDLEN:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll
index 812f777386221..4c89588af13ec 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll
@@ -12,9 +12,9 @@ define void @vector_add_i16(ptr noalias nocapture %a, i16 %v, i64 %n) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[V:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[V:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i16> poison, i16 [[V]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i16> poison, i16 [[V]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT3]], <2 x i16> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 8451f11380749..5567b656b35c5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -49,7 +49,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin
 ; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF2]]
 ; CHECK-NEXT:    [[IND_END4:%.*]] = add i64 3, [[N_VEC3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i8
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP9]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP9]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <16 x i8> [[DOTSPLAT]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index 638fc5bccba4c..d7c4733a909c2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -28,7 +28,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) {
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[IV_START]] to i32
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP9]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP9]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <16 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    [[IND_END:%.*]] = add i64 [[IV_START]], [[N_VEC]]
@@ -66,7 +66,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) {
 ; CHECK-NEXT:    [[N_VEC4:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF3]]
 ; CHECK-NEXT:    [[IND_END5:%.*]] = add i64 [[IV_START]], [[N_VEC4]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32
-; CHECK-NEXT:    [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i32> poison, i32 [[TMP20]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i32> poison, i32 [[TMP20]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT11:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT10]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION12:%.*]] = add <8 x i32> [[DOTSPLAT11]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
index 40a449f711720..cf1207d468f44 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
@@ -21,13 +21,13 @@ define i32 @test_explicit_pred(i64 %len) {
 ; CHECK-NEXT:    call void @init(ptr [[ALLOCA]])
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[LEN:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[LEN:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i64> poison, i64 [[LEN]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT11]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index 2051e37b2a677..60b7398e8d7d2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -1180,7 +1180,7 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O1VEC2-NEXT:  entry:
 ; O1VEC2-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; O1VEC2:       vector.ph:
-; O1VEC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
+; O1VEC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; O1VEC2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; O1VEC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; O1VEC2:       vector.body:
@@ -1220,7 +1220,7 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; OzVEC2-NEXT:  entry:
 ; OzVEC2-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; OzVEC2:       vector.ph:
-; OzVEC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
+; OzVEC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; OzVEC2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; OzVEC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; OzVEC2:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll
index f8b95f3dd6fb0..6a514c4807c1f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll
@@ -18,7 +18,7 @@ define i32 @foo_optsize() #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
@@ -57,7 +57,7 @@ define i32 @foo_optsize() #0 {
 ; AUTOVF:       vector.body:
 ; AUTOVF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AUTOVF-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; AUTOVF-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0
+; AUTOVF-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i64 0
 ; AUTOVF-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 ; AUTOVF-NEXT:    [[VEC_IV:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; AUTOVF-NEXT:    [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
@@ -118,7 +118,7 @@ define i32 @foo_minsize() #1 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
@@ -157,7 +157,7 @@ define i32 @foo_minsize() #1 {
 ; AUTOVF:       vector.body:
 ; AUTOVF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AUTOVF-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; AUTOVF-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i32 0
+; AUTOVF-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i64 0
 ; AUTOVF-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 ; AUTOVF-NEXT:    [[VEC_IV:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; AUTOVF-NEXT:    [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
@@ -216,7 +216,7 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon
 ; CHECK-NEXT:  for.body.preheader:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[K:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[K:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -256,7 +256,7 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon
 ; AUTOVF-NEXT:  for.body.preheader:
 ; AUTOVF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; AUTOVF:       vector.ph:
-; AUTOVF-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[K:%.*]], i32 0
+; AUTOVF-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[K:%.*]], i64 0
 ; AUTOVF-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; AUTOVF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; AUTOVF:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
index fb232b14a88f4..7aa9d54c85472 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
@@ -19,7 +19,7 @@
 ;
 
 ; CHECK-LABEL: vector.ph:
-; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0
 ; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer
 
 ; CHECK-LABEL: vector.body:
@@ -48,7 +48,7 @@
 ; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body
 
 ; AVX-LABEL: vector.ph:
-; AVX: %[[SplatVal:.*]] = insertelement <8 x i32> poison, i32 %n, i32 0
+; AVX: %[[SplatVal:.*]] = insertelement <8 x i32> poison, i32 %n, i64 0
 ; AVX: %[[Splat:.*]] = shufflevector <8 x i32> %[[SplatVal]], <8 x i32> poison, <8 x i32> zeroinitializer
 
 ; AVX-LABEL: vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
index 60c30a5b2ae78..6f30105645f1d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
@@ -42,21 +42,21 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 16
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT7]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT9]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT11]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT13]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT15]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT18:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT17]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT20:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT19]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
index b255d15b59702..277016ca02b2f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
@@ -14,7 +14,7 @@ define dso_local void @tail_folding_enabled(ptr noalias nocapture %A, ptr noalia
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], <i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429>
@@ -82,7 +82,7 @@ define dso_local void @tail_folding_disabled(ptr noalias nocapture %A, ptr noali
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], <i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429>
@@ -161,14 +161,14 @@ define i32 @reduction_i32(ptr nocapture readonly %A, ptr nocapture readonly %B,
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP2]], 1
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
index 5f2c8676653c0..df5931ec925e5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
@@ -63,7 +63,7 @@ define i32 @uniform_load2(ptr align(4) %addr) {
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ADDR:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1]] = add <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT9]]
 ; CHECK-NEXT:    [[TMP2]] = add <4 x i32> [[VEC_PHI1]], [[BROADCAST_SPLAT9]]
@@ -496,7 +496,7 @@ define i32 @uniform_load_global() {
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @GAddr, align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1]] = add <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT9]]
 ; CHECK-NEXT:    [[TMP2]] = add <4 x i32> [[VEC_PHI1]], [[BROADCAST_SPLAT9]]
@@ -558,7 +558,7 @@ define i32 @uniform_load_constexpr() {
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr (i32, ptr @GAddr, i64 5), align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1]] = add <4 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT9]]
 ; CHECK-NEXT:    [[TMP2]] = add <4 x i32> [[VEC_PHI1]], [[BROADCAST_SPLAT9]]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
index 64378d501e59b..5f8f5c0f23bea 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
@@ -92,7 +92,7 @@ define void @vectorized1(ptr noalias nocapture %A, ptr noalias nocapture readonl
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], <i64 19, i64 19, i64 19, i64 19, i64 19, i64 19, i64 19, i64 19>

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll
index d13c516fcb095..6e83cf612f82b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll
@@ -13,7 +13,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], <i64 10000, i64 10000, i64 10000, i64 10000, i64 10000, i64 10000, i64 10000, i64 10000>

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
index a63a7915346e9..0c2cbc8dc72bf 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) {
 ; CHECK-LABEL: @predicated_sdiv_masked_load(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -80,7 +80,7 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) {
 ; SINK-GATHER-NEXT:  entry:
 ; SINK-GATHER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SINK-GATHER:       vector.ph:
-; SINK-GATHER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i1> poison, i1 [[C:%.*]], i32 0
+; SINK-GATHER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i1> poison, i1 [[C:%.*]], i64 0
 ; SINK-GATHER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT]], <8 x i1> poison, <8 x i32> zeroinitializer
 ; SINK-GATHER-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SINK-GATHER:       vector.body:
@@ -243,9 +243,9 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT3]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -320,9 +320,9 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER:       vector.ph:
 ; SINK-GATHER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 8
 ; SINK-GATHER-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
-; SINK-GATHER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i1> poison, i1 [[C:%.*]], i32 0
+; SINK-GATHER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i1> poison, i1 [[C:%.*]], i64 0
 ; SINK-GATHER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT]], <8 x i1> poison, <8 x i32> zeroinitializer
-; SINK-GATHER-NEXT:    [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32> poison, i32 [[X:%.*]], i32 0
+; SINK-GATHER-NEXT:    [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32> poison, i32 [[X:%.*]], i64 0
 ; SINK-GATHER-NEXT:    [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; SINK-GATHER-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SINK-GATHER:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
index 44066a9ca8793..76ca2507b914c 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -158,7 +158,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index 57e3fa87c3cf7..02c647576c8cf 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -131,7 +131,7 @@ define float @fp_reduction_max(ptr noalias %a, i64 %N) {
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4
 ; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
-; CHECK-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_MERGE_RDX]], i32 0
+; CHECK-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_MERGE_RDX]], i64 0
 ; CHECK-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
index 7b24f607f5e51..f823e6d0c3110 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
@@ -17,7 +17,7 @@ define void @can_sink_after_store(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -99,7 +99,7 @@ define void @sink_sdiv(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr #0 {
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -180,7 +180,7 @@ define void @can_sink_with_additional_user(i32 %x, ptr %ptr, i64 %tc) {
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -381,11 +381,11 @@ define void @instruction_with_2_FOR_operands(ptr noalias %A, ptr noalias %B, ptr
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[A:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP3]]
@@ -453,11 +453,11 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses(ptr noalias
 ; CHECK-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[FOR_PTR_2:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[FOR_PTR_1:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
@@ -548,11 +548,11 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(ptr n
 ; CHECK-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[FOR_PTR_2:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[FOR_PTR_1:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>

diff  --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
index 850178538cbf9..c4a2a5f0a8f7b 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
@@ -106,11 +106,11 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[A:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP6:%.*]] = fneg <4 x float> [[TMP5]]

diff  --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index a5207e4f5f27d..4af3beae88e89 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -508,9 +508,9 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i64 1, [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
@@ -669,7 +669,7 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float
 ; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
 ; SINK-AFTER-NEXT:    [[IND_END:%.*]] = add i64 1, [[N_VEC]]
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
-; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
+; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
 ; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
 ; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SINK-AFTER:       vector.body:
@@ -921,7 +921,7 @@ define i32 @PR27246() {
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]]
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3
-; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i32 0
+; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -1017,7 +1017,7 @@ define i32 @PR27246() {
 ; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]]
 ; SINK-AFTER-NEXT:    [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]]
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3
-; SINK-AFTER-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i32 0
+; SINK-AFTER-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0
 ; SINK-AFTER-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; SINK-AFTER-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
 ; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -1402,9 +1402,9 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x)  {
 ; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-IC:       vector.ph:
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
@@ -1479,7 +1479,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x)  {
 ; SINK-AFTER-NEXT:  entry:
 ; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SINK-AFTER:       vector.ph:
-; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
 ; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SINK-AFTER:       vector.body:
@@ -2771,7 +2771,7 @@ define i32 @sink_into_replication_region(i32 %y) {
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
@@ -2780,7 +2780,7 @@ define i32 @sink_into_replication_region(i32 %y) {
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_UDIV_CONTINUE18]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_UDIV_CONTINUE18]] ]
 ; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT3]], <i32 0, i32 1, i32 2, i32 3>
 ; UNROLL-NO-IC-NEXT:    [[VEC_IV4:%.*]] = add <4 x i32> [[BROADCAST_SPLAT3]], <i32 4, i32 5, i32 6, i32 7>
@@ -2968,7 +2968,7 @@ define i32 @sink_into_replication_region(i32 %y) {
 ; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
 ; SINK-AFTER-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
 ; SINK-AFTER-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
-; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
+; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
 ; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SINK-AFTER:       vector.body:
@@ -2976,7 +2976,7 @@ define i32 @sink_into_replication_region(i32 %y) {
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_UDIV_CONTINUE8]] ]
 ; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_UDIV_CONTINUE8]] ]
 ; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
-; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
+; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
 ; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; SINK-AFTER-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3>
 ; SINK-AFTER-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
@@ -3078,7 +3078,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
@@ -3362,7 +3362,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
 ; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
 ; SINK-AFTER-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
 ; SINK-AFTER-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
-; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
+; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
 ; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SINK-AFTER:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
index 43229dc83d52e..ac60eb5c1c721 100644
--- a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
@@ -50,7 +50,7 @@ define float @minloopattr(ptr nocapture readonly %arg) #0 {
 ; CHECK-NEXT:    [[T:%.*]] = load float, ptr [[ARG:%.*]], align 4
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[T]], i32 0
+; CHECK-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[T]], i64 0
 ; CHECK-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
index 13cd1a5caab2d..ce7ac1bebde1a 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
@@ -780,7 +780,7 @@ define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index e3e2743c44511..7479fc0f84734 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -357,7 +357,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
 ; VEC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
 ; VEC-NEXT:    [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]]
 ; VEC-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0
-; VEC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND_2:%.*]], i32 0
+; VEC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND_2:%.*]], i64 0
 ; VEC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
 ; VEC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC:       vector.body:
@@ -558,7 +558,7 @@ define void @minimal_bit_widths(i1 %c) {
 ;
 ; VEC-LABEL: @minimal_bit_widths(
 ; VEC-NEXT:  entry:
-; VEC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
+; VEC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
 ; VEC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
 ; VEC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VEC:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll
index affe0ae290365..069cb1f7cad7b 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll
@@ -15,14 +15,14 @@
 ; CHECK:       for.body.lr.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @int_inc, align 4
 ; CHECK:       vector.ph:
-; CHECK:         [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 %init, i32 0
+; CHECK:         [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 %init, i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
 ; CHECK-NEXT:    [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP0]], 8
-; CHECK-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP7]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %vector.body
 ; CHECK:       vector.body:
@@ -85,14 +85,14 @@ for.end:                                          ; preds = %for.end.loopexit, %
 
 ; CHECK-LABEL: @induction_with_loop_inv(
 ; CHECK:       vector.ph:
-; CHECK:         [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 %x.011, i32 0
+; CHECK:         [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 %x.011, i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 %j.012, i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 %j.012, i64 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
 ; CHECK-NEXT:    [[INDUCTION4:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 %j.012, 8
-; CHECK-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %vector.body
 ; CHECK:       vector.body:
@@ -159,12 +159,12 @@ for.end6:                                         ; preds = %for.end6.loopexit,
 ; CHECK-LABEL: @non_primary_iv_loop_inv_trunc(
 ; CHECK:       vector.ph:
 ; CHECK:         [[TMP3:%.*]] = trunc i64 %step to i32
-; CHECK-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT5]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT6]]
 ; CHECK-NEXT:    [[INDUCTION7:%.*]] = add <8 x i32> zeroinitializer, [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP3]], 8
-; CHECK-NEXT:    [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT9:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %vector.body
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 4b41a32da1a95..4053bc52f8583 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -280,7 +280,7 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -473,9 +473,9 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
 ; UNROLL-NO-IC:       vector.ph:
 ; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[B]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[B]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
@@ -1262,7 +1262,7 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -1414,9 +1414,9 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) {
 ; UNROLL-NO-IC:       vector.ph:
 ; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
@@ -1983,7 +1983,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 2
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -2214,9 +2214,9 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
 ; UNROLL-NO-IC:       vector.ph:
 ; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 4
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]]
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT5]], <2 x i1> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
@@ -2482,7 +2482,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -2641,9 +2641,9 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; UNROLL-NO-IC:       vector.ph:
 ; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
@@ -3241,7 +3241,7 @@ define i32 @testoverflowcheck() {
 ; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
 ; CHECK-NEXT:    [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> <i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i32 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -3372,9 +3372,9 @@ define i32 @testoverflowcheck() {
 ; UNROLL-NO-IC-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> <i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i32 0
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
@@ -3503,7 +3503,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
 ; CHECK-NEXT:    [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]]
 ; CHECK-NEXT:    [[IND_END2:%.*]] = add i32 [[EXT]], [[N_VEC]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3708,7 +3708,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NO-IC-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]]
 ; UNROLL-NO-IC-NEXT:    [[IND_END2:%.*]] = add i32 [[EXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i32 0
+; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3885,7 +3885,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; CHECK-NEXT:    [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i32 [[N_VEC]], 4
 ; CHECK-NEXT:    [[IND_END1:%.*]] = add i32 [[EXT_MUL]], [[TMP12]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -4099,7 +4099,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i8 [[T]], [[DOTCAST]]
 ; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = mul i32 [[N_VEC]], 4
 ; UNROLL-NO-IC-NEXT:    [[IND_END1:%.*]] = add i32 [[EXT_MUL]], [[TMP12]]
-; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i32 0
+; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 4>
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -4697,7 +4697,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -4821,7 +4821,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i32 0
+; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -6013,7 +6013,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[SRC:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 [[TMP1]]
@@ -6137,7 +6137,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
 ; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2>
 ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = load i32, ptr [[SRC:%.*]], align 4
-; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
+; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT3]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT4]], [[TMP3]]
 ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT4]], [[TMP4]]
@@ -6295,12 +6295,12 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
 ; CHECK-NEXT:    [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP17]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = mul i32 [[STEP]], 2
-; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -6519,12 +6519,12 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; UNROLL-NO-IC-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
-; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i32 0
+; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
 ; UNROLL-NO-IC-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP17]]
 ; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = mul i32 [[STEP]], 2
-; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
+; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll
index 86b0ca2b3650a..8f0dc81d728cb 100644
--- a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll
@@ -132,7 +132,7 @@ define i32 @cond_branch(i32 %a, ptr %src) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll
index e1816b8d4d0ff..1f06990af9650 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-form.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll
@@ -52,7 +52,7 @@ define void @bottom_tested(ptr %p, i32 %n) {
 ; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
 ; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
 ; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1
-; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
+; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
 ; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TAILFOLD:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
index 2e0b27aeaa9f4..65c04ca1a89ee 100644
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -11,7 +11,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Turn this into a max reduction. Make sure we use a splat to initialize the
 ; vector for the reduction.
 ; CHECK-LABEL: @max_red(
-; CHECK: %[[VAR:.*]] = insertelement <2 x i32> poison, i32 %max, i32 0
+; CHECK: %[[VAR:.*]] = insertelement <2 x i32> poison, i32 %max, i64 0
 ; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK: icmp sgt <2 x i32>
 ; CHECK: select <2 x i1>

diff  --git a/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll b/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll
index f5a174c2e01a1..47104645c18a6 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll
@@ -68,7 +68,7 @@ define void @Test(ptr nocapture %obj, i64 %z) #0 {
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4, !alias.scope !0
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope !3
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[TMP10]]

diff  --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
index 952b13293ce24..8768d30f4f838 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
@@ -473,12 +473,12 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
 ; CHECK:       vector.main.loop.iter.check:
 ; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4
-; CHECK-NEXT:    [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[IND_END:%.*]] = mul i8 84, [[INDUCTION_IV]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -504,14 +504,14 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]]
-; CHECK-NEXT:    [[DOTSPLATINSERT9:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT9:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT10:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT9]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[DOTSPLATINSERT11:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT11:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT12:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT11]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT12]]
 ; CHECK-NEXT:    [[INDUCTION13:%.*]] = add <4 x i8> [[DOTSPLAT10]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 4
-; CHECK-NEXT:    [[DOTSPLATINSERT14:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT14:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT15:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT14]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
@@ -565,12 +565,12 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
 ; CHECK-PROFITABLE-BY-DEFAULT:       vector.main.loop.iter.check:
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK-PROFITABLE-BY-DEFAULT:       vector.ph:
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i32 0
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP2:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i32 0
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IND_END:%.*]] = mul i8 84, [[INDUCTION_IV]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -596,14 +596,14 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]]
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT9:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i32 0
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT9:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT10:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT9]], <2 x i8> poison, <2 x i32> zeroinitializer
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT11:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i32 0
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT11:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i64 0
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT12:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT11]], <2 x i8> poison, <2 x i32> zeroinitializer
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP8:%.*]] = mul <2 x i8> <i8 0, i8 1>, [[DOTSPLAT12]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDUCTION13:%.*]] = add <2 x i8> [[DOTSPLAT10]], [[TMP8]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 2
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT14:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i32 0
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT14:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT15:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT14]], <2 x i8> poison, <2 x i32> zeroinitializer
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.vector.body:
@@ -705,7 +705,7 @@ define void @f4(ptr noalias %A, i32 signext %n) {
 ; CHECK-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
 ; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -776,7 +776,7 @@ define void @f4(ptr noalias %A, i32 signext %n) {
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_VEC3:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF2]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP5:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]

diff  --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll
index 781f510abfcd0..27e5889356b09 100644
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -226,7 +226,7 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
index 7707a0b42182b..69c59000c8a96 100644
--- a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
@@ -15,7 +15,7 @@ define void @test(ptr %src, i64 %n) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
index f3cb2933dfbc4..4294212ecb747 100644
--- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
@@ -15,7 +15,7 @@
 ;
 ; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path < %s | FileCheck %s
 ; CHECK-LABEL: vector.ph:
-; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0
 ; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer
 
 ; CHECK-LABEL: vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
index 3bb85abe24b0c..92a4ea2bbda70 100644
--- a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
@@ -16,7 +16,7 @@
 ; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path < %s | FileCheck %s
 ; CHECK: %[[ZeroTripChk:.*]] = icmp sgt i32 %jCount, 0
 ; CHECK-LABEL: vector.ph:
-; CHECK: %[[CVal0:.*]] = insertelement <4 x i32> poison, i32 %c, i32 0
+; CHECK: %[[CVal0:.*]] = insertelement <4 x i32> poison, i32 %c, i64 0
 ; CHECK-NEXT: %[[CSplat:.*]] = shufflevector <4 x i32> %[[CVal0]], <4 x i32> poison, <4 x i32> zeroinitializer
 
 ; CHECK-LABEL: vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
index 91feee419622d..208f3f5929eb5 100644
--- a/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
@@ -69,12 +69,12 @@ define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
 ; CHECK-NEXT:    [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP17]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = mul i32 [[STEP]], 4
-; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -192,12 +192,12 @@ define void @doit2(i32 %n, i32 %step) local_unnamed_addr  {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
 ; CHECK-NEXT:    [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP16]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = mul i32 [[STEP]], 4
-; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP17]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -387,12 +387,12 @@ define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
 ; CHECK-NEXT:    [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[CONV]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[CONV]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[CONV]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> zeroinitializer, [[TMP14]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul i32 [[CONV]], 4
-; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll
index 8da5cb3310d78..f74972078152d 100644
--- a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll
@@ -34,7 +34,7 @@ define void @scev4stride1(i32* noalias nocapture %a, i32* noalias nocapture read
 ; CHECK-NEXT:  for.body.preheader:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
index 06f70a4cccb5d..b91a96ccf14aa 100644
--- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
@@ -17,7 +17,7 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE2:%.*]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr @v_38, align 1
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], <i1 true, i1 true>

diff  --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll
index 3a9aa6b5726cf..dcc8f3f2f9d8f 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45259.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll
@@ -34,7 +34,7 @@ define i8 @widget(ptr %arr, i8 %t9) {
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[T9:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[T9:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/pr50686.ll b/llvm/test/Transforms/LoopVectorize/pr50686.ll
index f1be6867ad942..9cf4f9c997133 100644
--- a/llvm/test/Transforms/LoopVectorize/pr50686.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr50686.ll
@@ -20,15 +20,15 @@ define void @m(ptr nocapture %p, ptr nocapture %p2, i32 %q) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[P2]], align 4, !alias.scope !0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <4 x i32> zeroinitializer, [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_1]], align 4, !alias.scope !0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], [[BROADCAST_SPLAT3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope !0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[TMP0]]

diff  --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
index 1e6a94eb2b2ac..56dd29e34e5e7 100644
--- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
@@ -19,7 +19,7 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize {
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE4]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 41, [[TMP0]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IV]], <i32 40, i32 40>

diff  --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
index 151c486b353df..b9e0eb3ada52e 100644
--- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
@@ -6,11 +6,11 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT3]], <2 x i1> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
index a8510ee80e7b6..837d663f4a926 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
@@ -11,9 +11,9 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
index 630a17ece9f32..9b843868f6e61 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
@@ -170,9 +170,9 @@ for.end:
 define void @constant_folded_previous_value() {
 ; CHECK-VF4UF2-LABEL: @constant_folded_previous_value
 ; CHECK-VF4UF2: vector.body
-; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i64> [ %vector.recur.init, %vector.ph ], [ shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), %vector.body ]
-; CHECK-VF4UF2: %[[SPLICE1:.*]] = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> %vector.recur, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), i32 -1)
-; CHECK-VF4UF2: %[[SPLICE2:.*]] = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), i32 -1)
+; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i64> [ %vector.recur.init, %vector.ph ], [ shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), %vector.body ]
+; CHECK-VF4UF2: %[[SPLICE1:.*]] = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> %vector.recur, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), i32 -1)
+; CHECK-VF4UF2: %[[SPLICE2:.*]] = call <vscale x 4 x i64> @llvm.experimental.vector.splice.nxv4i64(<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), i32 -1)
 ; CHECK-VF4UF2: br i1 {{.*}}, label %middle.block, label %vector.body
 entry:
   br label %scalar.body
@@ -203,9 +203,9 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x)  {
 ; CHECK-VF4UF2: %[[MUL1:.*]] = mul i32 %[[VSCALE1]], 4
 ; CHECK-VF4UF2: %[[SUB1:.*]] = sub i32 %[[MUL1]], 1
 ; CHECK-VF4UF2: %[[VEC_RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 %[[SUB1]]
-; CHECK-VF4UF2: %[[SPLAT_INS1:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
+; CHECK-VF4UF2: %[[SPLAT_INS1:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i64 0
 ; CHECK-VF4UF2: %[[SPLAT1:.*]] = shufflevector <vscale x 4 x i32> %[[SPLAT_INS1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-VF4UF2: %[[SPLAT_INS2:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
+; CHECK-VF4UF2: %[[SPLAT_INS2:.*]] = insertelement <vscale x 4 x i32> poison, i32 %x, i64 0
 ; CHECK-VF4UF2: %[[SPLAT2:.*]] = shufflevector <vscale x 4 x i32> %[[SPLAT_INS2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; ; CHECK-VF4UF2: vector.body
 ; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[VEC_RECUR_INIT]], %vector.ph ], [ %[[ADD2:.*]], %vector.body ]

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
index f932164b9e2a9..9a76c283530cd 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
@@ -194,7 +194,7 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
 ; CHECK-NEXT:    [[IND_END:%.*]] = shl i32 [[CAST_CRD]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
-; CHECK-NEXT:    [[TMP5:%.*]] = shl <vscale x 4 x i32> [[TMP4]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <vscale x 4 x i32> [[TMP4]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP7:%.*]] = shl i32 [[TMP6]], 3
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP7]], i64 0
@@ -273,7 +273,7 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[IND_END:%.*]] = fadd float [[TMP4]], 0.000000e+00
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
 ; CHECK-NEXT:    [[TMP6:%.*]] = uitofp <vscale x 4 x i32> [[TMP5]] to <vscale x 4 x float>
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <vscale x 4 x float> [[TMP6]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <vscale x 4 x float> [[TMP6]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = fadd <vscale x 4 x float> [[TMP7]], zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i32 [[TMP8]], 2

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll
index e1211cc9f252d..39c6ea0e418a3 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll
@@ -41,7 +41,7 @@ for.end:
 ; CHECK:      entry:
 ; CHECK:        [[ALLOCA:%.*]] = alloca [1024 x i32], align 16
 ; CHECK:      vector.ph:
-; CHECK:        [[TMP1:%.*]] = insertelement <vscale x 2 x [1024 x i32]*> poison, [1024 x i32]* %arr, i32 0
+; CHECK:        [[TMP1:%.*]] = insertelement <vscale x 2 x [1024 x i32]*> poison, [1024 x i32]* %arr, i64 0
 ; CHECK-NEXT:   [[SPLAT_ALLOCA:%.*]] = shufflevector <vscale x 2 x [1024 x i32]*> [[TMP1]], <vscale x 2 x [1024 x i32]*> poison, <vscale x 2 x i32> zeroinitializer
 ; CHECK:      vector.body:
 ; CHECK:        [[BC_ALLOCA:%.*]] = bitcast <vscale x 2 x [1024 x i32]*> [[SPLAT_ALLOCA]] to <vscale x 2 x i8*>

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
index 2b1c32cd144ce..dc156a0be88d5 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
@@ -17,7 +17,7 @@
 ; CHECKUF1: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index
 ; CHECKUF1: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8
-; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index
 ; CHECKUF1: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8
 ; CHECKUF1: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
@@ -51,8 +51,8 @@
 ; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64
 ; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXB]], i64 %[[VSCALE2_EXT]]
 ; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, ptr %[[IDXB_NEXT]], align 8
-; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i64 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index
 ; CHECKUF2: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8
 ; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32()

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
index a1c30894e8421..3cc6e5fa7b8d5 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
@@ -9,8 +9,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> zeroinitializer, i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ]
-; CHECK:         [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK:         [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr
 ; CHECK:         [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr
 ; CHECK-NEXT:    [[TMP26:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
index a99f875b413e7..38efdbe9fe2a3 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
@@ -13,7 +13,7 @@ define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -73,7 +73,7 @@ define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len)
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[LEN]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[LEN]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[ARG1:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[ARG1:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
index 49afe952c32db..c9f2aaef6d5c8 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
@@ -117,9 +117,9 @@ exit:                                     ; preds = %for.body
 define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) {
 ; CHECK-LABEL: @select_i32_from_icmp
 ; CHECK-VF4IC1:      vector.ph:
-; CHECK-VF4IC1:        [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
+; CHECK-VF4IC1:        [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
 ; CHECK-VF4IC1-NEXT:   [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-VF4IC1-NEXT:   [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i32 0
+; CHECK-VF4IC1-NEXT:   [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0
 ; CHECK-VF4IC1-NEXT:   [[SPLAT_OF_B:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -127,7 +127,7 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64
 ; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], <i32 3, i32 3, i32 3, i32 3>
 ; CHECK-VF4IC1-NEXT:   [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> [[SPLAT_OF_B]]
 ; CHECK-VF4IC1:      middle.block:
-; CHECK-VF4IC1-NEXT:   [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
+; CHECK-VF4IC1-NEXT:   [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
 ; CHECK-VF4IC1-NEXT:   [[FIN_SPLAT:%.*]] = shufflevector <4 x i32> [[FIN_INS]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-VF4IC1-NEXT:   [[FIN_CMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], [[FIN_SPLAT]]
 ; CHECK-VF4IC1-NEXT:   [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_CMP]])
@@ -214,16 +214,16 @@ exit:                                     ; preds = %for.body
 define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
 ; CHECK-LABEL: @select_i32_from_icmp_same_inputs
 ; CHECK-VF4IC1:      vector.ph:
-; CHECK-VF4IC1:        [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
+; CHECK-VF4IC1:        [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
 ; CHECK-VF4IC1-NEXT:   [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-VF4IC1-NEXT:   [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i32 0
+; CHECK-VF4IC1-NEXT:   [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0
 ; CHECK-VF4IC1-NEXT:   [[SPLAT_OF_B:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
 ; CHECK-VF4IC1:        [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_PHI]], <i32 3, i32 3, i32 3, i32 3>
 ; CHECK-VF4IC1-NEXT:   [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> [[SPLAT_OF_B]]
 ; CHECK-VF4IC1:      middle.block:
-; CHECK-VF4IC1-NEXT:   [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
+; CHECK-VF4IC1-NEXT:   [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
 ; CHECK-VF4IC1-NEXT:   [[FIN_SPLAT:%.*]] = shufflevector <4 x i32> [[FIN_INS]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-VF4IC1-NEXT:   [[FIN_CMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], [[FIN_SPLAT]]
 ; CHECK-VF4IC1-NEXT:   [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_CMP]])

diff  --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll
index c76ffdf892734..8a4d5a78d159c 100644
--- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll
@@ -19,13 +19,13 @@ define i32 @test(i64 %N, i32 %x) {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[IND_END:%.*]] = sub i64 [[EXTRA_ITER]], [[N_VEC]]
 ; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[EXTRA_ITER]], 1
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], <i64 0, i64 1, i64 2, i64 3>
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]

diff  --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
index 8b0aaf2284878..250d0b7fed456 100644
--- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
@@ -11,7 +11,7 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -98,7 +98,7 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -190,7 +190,7 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -279,7 +279,7 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
index 14e4a84894bce..51e400d569cf6 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
@@ -5,14 +5,14 @@
 define void @blend_uniform_iv_trunc(i1 %c) {
 ; CHECK-LABEL: @blend_uniform_iv_trunc(
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[MASK0:%.*]] = insertelement <4 x i1> poison, i1 %c, i32 0
+; CHECK-NEXT:    [[MASK0:%.*]] = insertelement <4 x i1> poison, i1 %c, i64 0
 ; CHECK-NEXT:    [[MASK1:%.*]] = shufflevector <4 x i1> [[MASK0]], <4 x i1> poison, <4 x i32> zeroinitializer
 
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDEX]] to i16
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[TMP1]], 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i1> [[MASK1]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[MASK1]], <4 x i16> [[BROADCAST_SPLAT2]], <4 x i16> undef
@@ -51,13 +51,13 @@ exit:                                             ; preds = %loop.latch
 define void @blend_uniform_iv(i1 %c) {
 ; CHECK-LABEL: @blend_uniform_iv(
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[MASK0:%.*]] = insertelement <4 x i1> poison, i1 %c, i32 0
+; CHECK-NEXT:    [[MASK0:%.*]] = insertelement <4 x i1> poison, i1 %c, i64 0
 ; CHECK-NEXT:    [[MASK1:%.*]] = shufflevector <4 x i1> [[MASK0]], <4 x i1> poison, <4 x i32> zeroinitializer
 
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i1> [[MASK1]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[MASK1]], <4 x i64> [[BROADCAST_SPLAT2]], <4 x i64> undef
@@ -95,7 +95,7 @@ exit:                                             ; preds = %loop.latch
 define void @blend_chain_iv(i1 %c) {
 ; CHECK-LABEL: @blend_chain_iv(
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[MASK0:%.*]] = insertelement <4 x i1> poison, i1 %c, i32 0
+; CHECK-NEXT:    [[MASK0:%.*]] = insertelement <4 x i1> poison, i1 %c, i64 0
 ; CHECK-NEXT:    [[MASK1:%.*]] = shufflevector <4 x i1> [[MASK0]], <4 x i1> poison, <4 x i32> zeroinitializer
 
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
index 797a50290be8b..7b65d0257a1dc 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
@@ -14,9 +14,9 @@ define void @loop_invariant_select(ptr noalias nocapture %out, i1 %select, doubl
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -59,9 +59,9 @@ define void @outer_loop_dependant_select(ptr noalias nocapture %out, double %a,
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -106,9 +106,9 @@ define void @inner_loop_dependant_select(ptr noalias nocapture %out, double %a,
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
@@ -153,9 +153,9 @@ define void @outer_and_inner_loop_dependant_select(ptr noalias nocapture %out, d
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll
index 66e3f5b34705b..54fc716c5b7d9 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll
@@ -34,18 +34,18 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = fadd <1 x double> [[TMP19]], [[TMP21]]
 ; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
@@ -53,18 +53,18 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
 ; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
 ; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP33]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP33]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP34:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
 ; CHECK-NEXT:    [[TMP35:%.*]] = fadd <1 x double> [[TMP32]], [[TMP34]]
@@ -72,18 +72,18 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP37:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP36]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK24:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP38]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP38]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP39:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
 ; CHECK-NEXT:    [[BLOCK27:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP41:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
 ; CHECK-NEXT:    [[TMP42:%.*]] = fadd <1 x double> [[TMP39]], [[TMP41]]
 ; CHECK-NEXT:    [[BLOCK30:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP43]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP43]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
 ; CHECK-NEXT:    [[TMP45:%.*]] = fadd <1 x double> [[TMP42]], [[TMP44]]
@@ -91,18 +91,18 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <3 x double> [[TMP37]], <3 x double> [[TMP46]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK33:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP48]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP48]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP49:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
 ; CHECK-NEXT:    [[BLOCK36:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP50]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP50]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP51:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
 ; CHECK-NEXT:    [[TMP52:%.*]] = fadd <1 x double> [[TMP49]], [[TMP51]]
 ; CHECK-NEXT:    [[BLOCK39:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP53:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP53]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP53]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP54:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]]
 ; CHECK-NEXT:    [[TMP55:%.*]] = fadd <1 x double> [[TMP52]], [[TMP54]]
@@ -110,18 +110,18 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP57:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP56]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK42:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]]
 ; CHECK-NEXT:    [[BLOCK45:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP60]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP60]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP61:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]]
 ; CHECK-NEXT:    [[TMP62:%.*]] = fadd <1 x double> [[TMP59]], [[TMP61]]
 ; CHECK-NEXT:    [[BLOCK48:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP63:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP63]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP63]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP64:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]]
 ; CHECK-NEXT:    [[TMP65:%.*]] = fadd <1 x double> [[TMP62]], [[TMP64]]
@@ -129,18 +129,18 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP67:%.*]] = shufflevector <3 x double> [[TMP57]], <3 x double> [[TMP66]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK51:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP68:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP68]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP68]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP69:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]]
 ; CHECK-NEXT:    [[BLOCK54:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP70]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP70]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP71:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]]
 ; CHECK-NEXT:    [[TMP72:%.*]] = fadd <1 x double> [[TMP69]], [[TMP71]]
 ; CHECK-NEXT:    [[BLOCK57:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP73:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP73]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP73]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP74:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]]
 ; CHECK-NEXT:    [[TMP75:%.*]] = fadd <1 x double> [[TMP72]], [[TMP74]]
@@ -148,18 +148,18 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP77:%.*]] = shufflevector <3 x double> [[TMP67]], <3 x double> [[TMP76]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK60:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP78:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP78]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP78]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP79:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]]
 ; CHECK-NEXT:    [[BLOCK63:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP80]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP80]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP81:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]]
 ; CHECK-NEXT:    [[TMP82:%.*]] = fadd <1 x double> [[TMP79]], [[TMP81]]
 ; CHECK-NEXT:    [[BLOCK66:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP83:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP83]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP83]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP84:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]]
 ; CHECK-NEXT:    [[TMP85:%.*]] = fadd <1 x double> [[TMP82]], [[TMP84]]
@@ -167,18 +167,18 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP87:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP86]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK69:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP88:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP88]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP88]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP89:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]]
 ; CHECK-NEXT:    [[BLOCK72:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP90]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP90]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP91:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]]
 ; CHECK-NEXT:    [[TMP92:%.*]] = fadd <1 x double> [[TMP89]], [[TMP91]]
 ; CHECK-NEXT:    [[BLOCK75:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP93:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP93]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP93]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP94:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]]
 ; CHECK-NEXT:    [[TMP95:%.*]] = fadd <1 x double> [[TMP92]], [[TMP94]]
@@ -186,18 +186,18 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP97:%.*]] = shufflevector <3 x double> [[TMP87]], <3 x double> [[TMP96]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK78:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP98:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP98]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP98]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP99:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]]
 ; CHECK-NEXT:    [[BLOCK81:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP100]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP100]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP101:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]]
 ; CHECK-NEXT:    [[TMP102:%.*]] = fadd <1 x double> [[TMP99]], [[TMP101]]
 ; CHECK-NEXT:    [[BLOCK84:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP103:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP103]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP103]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP104:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]]
 ; CHECK-NEXT:    [[TMP105:%.*]] = fadd <1 x double> [[TMP102]], [[TMP104]]
@@ -268,18 +268,18 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = fadd <1 x double> [[TMP19]], [[TMP21]]
 ; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
@@ -287,18 +287,18 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
 ; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
 ; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP33]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP33]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP34:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
 ; CHECK-NEXT:    [[TMP35:%.*]] = fadd <1 x double> [[TMP32]], [[TMP34]]
@@ -306,18 +306,18 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP37:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP36]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK24:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP38]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP38]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP39:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
 ; CHECK-NEXT:    [[BLOCK27:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP41:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
 ; CHECK-NEXT:    [[TMP42:%.*]] = fadd <1 x double> [[TMP39]], [[TMP41]]
 ; CHECK-NEXT:    [[BLOCK30:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP43]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP43]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
 ; CHECK-NEXT:    [[TMP45:%.*]] = fadd <1 x double> [[TMP42]], [[TMP44]]
@@ -325,18 +325,18 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <3 x double> [[TMP37]], <3 x double> [[TMP46]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK33:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP48]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP48]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP49:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
 ; CHECK-NEXT:    [[BLOCK36:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP50]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP50]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP51:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
 ; CHECK-NEXT:    [[TMP52:%.*]] = fadd <1 x double> [[TMP49]], [[TMP51]]
 ; CHECK-NEXT:    [[BLOCK39:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP53:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP53]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP53]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP54:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]]
 ; CHECK-NEXT:    [[TMP55:%.*]] = fadd <1 x double> [[TMP52]], [[TMP54]]
@@ -344,18 +344,18 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP57:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP56]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK42:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]]
 ; CHECK-NEXT:    [[BLOCK45:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP60]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP60]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP61:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]]
 ; CHECK-NEXT:    [[TMP62:%.*]] = fadd <1 x double> [[TMP59]], [[TMP61]]
 ; CHECK-NEXT:    [[BLOCK48:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP63:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP63]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP63]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP64:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]]
 ; CHECK-NEXT:    [[TMP65:%.*]] = fadd <1 x double> [[TMP62]], [[TMP64]]
@@ -363,18 +363,18 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP67:%.*]] = shufflevector <3 x double> [[TMP57]], <3 x double> [[TMP66]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK51:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP68:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP68]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP68]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP69:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]]
 ; CHECK-NEXT:    [[BLOCK54:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP70]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP70]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP71:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]]
 ; CHECK-NEXT:    [[TMP72:%.*]] = fadd <1 x double> [[TMP69]], [[TMP71]]
 ; CHECK-NEXT:    [[BLOCK57:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP73:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP73]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP73]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP74:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]]
 ; CHECK-NEXT:    [[TMP75:%.*]] = fadd <1 x double> [[TMP72]], [[TMP74]]
@@ -382,18 +382,18 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP77:%.*]] = shufflevector <3 x double> [[TMP67]], <3 x double> [[TMP76]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK60:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP78:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP78]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP78]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP79:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]]
 ; CHECK-NEXT:    [[BLOCK63:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP80]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP80]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP81:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]]
 ; CHECK-NEXT:    [[TMP82:%.*]] = fadd <1 x double> [[TMP79]], [[TMP81]]
 ; CHECK-NEXT:    [[BLOCK66:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP83:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP83]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP83]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP84:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]]
 ; CHECK-NEXT:    [[TMP85:%.*]] = fadd <1 x double> [[TMP82]], [[TMP84]]
@@ -401,18 +401,18 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP87:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP86]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK69:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP88:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP88]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP88]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP89:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]]
 ; CHECK-NEXT:    [[BLOCK72:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP90]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP90]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP91:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]]
 ; CHECK-NEXT:    [[TMP92:%.*]] = fadd <1 x double> [[TMP89]], [[TMP91]]
 ; CHECK-NEXT:    [[BLOCK75:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP93:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP93]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP93]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP94:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]]
 ; CHECK-NEXT:    [[TMP95:%.*]] = fadd <1 x double> [[TMP92]], [[TMP94]]
@@ -420,18 +420,18 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) {
 ; CHECK-NEXT:    [[TMP97:%.*]] = shufflevector <3 x double> [[TMP87]], <3 x double> [[TMP96]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK78:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP98:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP98]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP98]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP99:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]]
 ; CHECK-NEXT:    [[BLOCK81:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP100]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP100]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP101:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]]
 ; CHECK-NEXT:    [[TMP102:%.*]] = fadd <1 x double> [[TMP99]], [[TMP101]]
 ; CHECK-NEXT:    [[BLOCK84:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP103:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP103]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP103]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP104:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]]
 ; CHECK-NEXT:    [[TMP105:%.*]] = fadd <1 x double> [[TMP102]], [[TMP104]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll
index 6671fa64ef9fa..dc97410bdd22a 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll
@@ -17,12 +17,12 @@ define void @test(i32 %r, i32 %c) {
 ; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, ptr getelementptr (double, ptr @foo, i64 2), align 8
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK2:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT3]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[BLOCK2]], [[SPLAT_SPLAT4]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
@@ -30,12 +30,12 @@ define void @test(i32 %r, i32 %c) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[BLOCK5]], [[SPLAT_SPLAT7]]
 ; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[BLOCK8]], [[SPLAT_SPLAT10]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
@@ -43,12 +43,12 @@ define void @test(i32 %r, i32 %c) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[BLOCK11]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
@@ -56,12 +56,12 @@ define void @test(i32 %r, i32 %c) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product.ll
index 110802769c102..b88ad16c0c3c1 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product.ll
@@ -14,32 +14,32 @@ define <1 x float> @dotproduct_float_v6(<6 x float> %a, <6 x float> %b) {
 ; CHECK-NEXT:    [[SPLIT6:%.*]] = shufflevector <6 x float> [[B:%.*]], <6 x float> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x float> [[SPLIT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <6 x float> [[SPLIT6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <1 x float> [[SPLIT1]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <6 x float> [[SPLIT6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK7]], <1 x float> [[SPLAT_SPLAT9]], <1 x float> [[TMP1]])
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <1 x float> [[SPLIT2]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <6 x float> [[SPLIT6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP3]])
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <1 x float> [[SPLIT3]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <6 x float> [[SPLIT6]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK13]], <1 x float> [[SPLAT_SPLAT15]], <1 x float> [[TMP5]])
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <1 x float> [[SPLIT4]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <6 x float> [[SPLIT6]], i64 4
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP7]])
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <1 x float> [[SPLIT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <6 x float> [[SPLIT6]], i64 5
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP10]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK19]], <1 x float> [[SPLAT_SPLAT21]], <1 x float> [[TMP9]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> poison, <1 x i32> zeroinitializer
@@ -60,7 +60,7 @@ define <1 x float> @dotproduct_float_v1(<1 x float> %a, <1 x float> %b) {
 ; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <1 x float> [[B:%.*]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x float> [[SPLIT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x float> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <1 x i32> zeroinitializer
@@ -83,17 +83,17 @@ define <1 x float> @dotproduct_float_v3(<3 x float> %a, <3 x float> %b) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <3 x float> [[B:%.*]], <3 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x float> [[SPLIT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <1 x float> [[SPLIT1]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]])
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <1 x float> [[SPLIT2]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <3 x float> [[SPLIT3]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK7]], <1 x float> [[SPLAT_SPLAT9]], <1 x float> [[TMP3]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x float> [[TMP5]], <1 x float> poison, <1 x i32> zeroinitializer
@@ -137,32 +137,32 @@ define <1 x float> @intrinsic_column_major_load_dot_product_float_v6(ptr %lhs_ad
 ; CHECK-NEXT:    [[SPLIT16:%.*]] = shufflevector <6 x float> [[TMP5]], <6 x float> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x float> [[SPLIT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <6 x float> [[SPLIT16]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul fast <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <1 x float> [[SPLIT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <6 x float> [[SPLIT16]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT18]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK17]], <1 x float> [[SPLAT_SPLAT19]], <1 x float> [[TMP7]])
 ; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <1 x float> [[SPLIT12]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <6 x float> [[SPLIT16]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x float> poison, float [[TMP10]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x float> poison, float [[TMP10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT21]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK20]], <1 x float> [[SPLAT_SPLAT22]], <1 x float> [[TMP9]])
 ; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <1 x float> [[SPLIT13]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <6 x float> [[SPLIT16]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT24]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK23]], <1 x float> [[SPLAT_SPLAT25]], <1 x float> [[TMP11]])
 ; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <1 x float> [[SPLIT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <6 x float> [[SPLIT16]], i64 4
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT27]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK26]], <1 x float> [[SPLAT_SPLAT28]], <1 x float> [[TMP13]])
 ; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <1 x float> [[SPLIT15]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <6 x float> [[SPLIT16]], i64 5
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x float> poison, float [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x float> poison, float [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT30]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK29]], <1 x float> [[SPLAT_SPLAT31]], <1 x float> [[TMP15]])
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x float> [[TMP17]], <1 x float> poison, <1 x i32> zeroinitializer
@@ -197,37 +197,37 @@ define <1 x float> @LoadInst_dot_product_float_v7(ptr %lhs_address, ptr %rhs_add
 ; CHECK-NEXT:    [[COL_LOAD12:%.*]] = load <7 x float>, ptr [[RHS_ADDRESS:%.*]], align 32
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x float> [[COL_LOAD]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <7 x float> [[COL_LOAD12]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <1 x float> [[COL_LOAD1]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <7 x float> [[COL_LOAD12]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK13]], <1 x float> [[SPLAT_SPLAT15]], <1 x float> [[TMP1]])
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <1 x float> [[COL_LOAD3]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <7 x float> [[COL_LOAD12]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP3]])
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <1 x float> [[COL_LOAD5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <7 x float> [[COL_LOAD12]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK19]], <1 x float> [[SPLAT_SPLAT21]], <1 x float> [[TMP5]])
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <1 x float> [[COL_LOAD7]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <7 x float> [[COL_LOAD12]], i64 4
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP7]])
 ; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <1 x float> [[COL_LOAD9]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <7 x float> [[COL_LOAD12]], i64 5
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x float> poison, float [[TMP10]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x float> poison, float [[TMP10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT26]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK25]], <1 x float> [[SPLAT_SPLAT27]], <1 x float> [[TMP9]])
 ; CHECK-NEXT:    [[BLOCK28:%.*]] = shufflevector <1 x float> [[COL_LOAD11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <7 x float> [[COL_LOAD12]], i64 6
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT29]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK28]], <1 x float> [[SPLAT_SPLAT30]], <1 x float> [[TMP11]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x float> [[TMP13]], <1 x float> poison, <1 x i32> zeroinitializer
@@ -255,32 +255,32 @@ define <1 x double> @dotproduct_double_v6(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[SPLIT6:%.*]] = shufflevector <6 x double> [[B:%.*]], <6 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x double> [[SPLIT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <1 x double> [[SPLIT1]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK7]], <1 x double> [[SPLAT_SPLAT9]], <1 x double> [[TMP1]])
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <1 x double> [[SPLIT2]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP3]])
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <1 x double> [[SPLIT3]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK13]], <1 x double> [[SPLAT_SPLAT15]], <1 x double> [[TMP5]])
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <1 x double> [[SPLIT4]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 4
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP7]])
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <1 x double> [[SPLIT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 5
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK19]], <1 x double> [[SPLAT_SPLAT21]], <1 x double> [[TMP9]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <1 x i32> zeroinitializer
@@ -324,32 +324,32 @@ define <1 x double> @intrinsic_column_major_load_dot_product_double_v6(ptr %lhs_
 ; CHECK-NEXT:    [[SPLIT16:%.*]] = shufflevector <6 x double> [[TMP5]], <6 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x double> [[SPLIT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <6 x double> [[SPLIT16]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul fast <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <1 x double> [[SPLIT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <6 x double> [[SPLIT16]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK17]], <1 x double> [[SPLAT_SPLAT19]], <1 x double> [[TMP7]])
 ; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <1 x double> [[SPLIT12]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <6 x double> [[SPLIT16]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK20]], <1 x double> [[SPLAT_SPLAT22]], <1 x double> [[TMP9]])
 ; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <1 x double> [[SPLIT13]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <6 x double> [[SPLIT16]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK23]], <1 x double> [[SPLAT_SPLAT25]], <1 x double> [[TMP11]])
 ; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <1 x double> [[SPLIT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <6 x double> [[SPLIT16]], i64 4
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK26]], <1 x double> [[SPLAT_SPLAT28]], <1 x double> [[TMP13]])
 ; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <1 x double> [[SPLIT15]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <6 x double> [[SPLIT16]], i64 5
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK29]], <1 x double> [[SPLAT_SPLAT31]], <1 x double> [[TMP15]])
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <1 x double> [[TMP17]], <1 x double> poison, <1 x i32> zeroinitializer
@@ -384,37 +384,37 @@ define <1 x double> @LoadInst_dot_product_double_v7(ptr %lhs_address, ptr %rhs_a
 ; CHECK-NEXT:    [[COL_LOAD12:%.*]] = load <7 x double>, ptr [[RHS_ADDRESS:%.*]], align 64
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x double> [[COL_LOAD]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <7 x double> [[COL_LOAD12]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <1 x double> [[COL_LOAD1]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <7 x double> [[COL_LOAD12]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK13]], <1 x double> [[SPLAT_SPLAT15]], <1 x double> [[TMP1]])
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <1 x double> [[COL_LOAD3]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <7 x double> [[COL_LOAD12]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP3]])
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <1 x double> [[COL_LOAD5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <7 x double> [[COL_LOAD12]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK19]], <1 x double> [[SPLAT_SPLAT21]], <1 x double> [[TMP5]])
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <1 x double> [[COL_LOAD7]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <7 x double> [[COL_LOAD12]], i64 4
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP7]])
 ; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <1 x double> [[COL_LOAD9]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <7 x double> [[COL_LOAD12]], i64 5
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK25]], <1 x double> [[SPLAT_SPLAT27]], <1 x double> [[TMP9]])
 ; CHECK-NEXT:    [[BLOCK28:%.*]] = shufflevector <1 x double> [[COL_LOAD11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <7 x double> [[COL_LOAD12]], i64 6
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT29]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK28]], <1 x double> [[SPLAT_SPLAT30]], <1 x double> [[TMP11]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> poison, <1 x i32> zeroinitializer
@@ -444,48 +444,48 @@ define <1 x i64> @dotproduct_i64_v8(<8 x i64> %a, <8 x i64> %b) {
 ; CHECK-NEXT:    [[SPLIT8:%.*]] = shufflevector <8 x i64> [[B:%.*]], <8 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x i64> [[SPLIT]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x i64> [[SPLIT8]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i64> poison, i64 [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i64> poison, i64 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <1 x i64> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <1 x i64> [[SPLIT1]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i64> [[SPLIT8]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT10]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul <1 x i64> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <1 x i64> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <1 x i64> [[SPLIT2]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i64> [[SPLIT8]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x i64> poison, i64 [[TMP5]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x i64> poison, i64 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT13]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul <1 x i64> [[BLOCK12]], [[SPLAT_SPLAT14]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <1 x i64> [[TMP4]], [[TMP6]]
 ; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <1 x i64> [[SPLIT3]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i64> [[SPLIT8]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x i64> poison, i64 [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x i64> poison, i64 [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT16]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul <1 x i64> [[BLOCK15]], [[SPLAT_SPLAT17]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = add <1 x i64> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <1 x i64> [[SPLIT4]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i64> [[SPLIT8]], i64 4
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x i64> poison, i64 [[TMP11]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x i64> poison, i64 [[TMP11]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT19]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul <1 x i64> [[BLOCK18]], [[SPLAT_SPLAT20]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add <1 x i64> [[TMP10]], [[TMP12]]
 ; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <1 x i64> [[SPLIT5]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <8 x i64> [[SPLIT8]], i64 5
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x i64> poison, i64 [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x i64> poison, i64 [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT22]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul <1 x i64> [[BLOCK21]], [[SPLAT_SPLAT23]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = add <1 x i64> [[TMP13]], [[TMP15]]
 ; CHECK-NEXT:    [[BLOCK24:%.*]] = shufflevector <1 x i64> [[SPLIT6]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <8 x i64> [[SPLIT8]], i64 6
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x i64> poison, i64 [[TMP17]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x i64> poison, i64 [[TMP17]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT25]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = mul <1 x i64> [[BLOCK24]], [[SPLAT_SPLAT26]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = add <1 x i64> [[TMP16]], [[TMP18]]
 ; CHECK-NEXT:    [[BLOCK27:%.*]] = shufflevector <1 x i64> [[SPLIT7]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i64> [[SPLIT8]], i64 7
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x i64> poison, i64 [[TMP20]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x i64> poison, i64 [[TMP20]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x i64> [[SPLAT_SPLATINSERT28]], <1 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = mul <1 x i64> [[BLOCK27]], [[SPLAT_SPLAT29]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = add <1 x i64> [[TMP19]], [[TMP21]]
@@ -537,48 +537,48 @@ define <1 x i32> @intrinsic_column_major_load_dot_product_i32_v8(ptr %lhs_addres
 ; CHECK-NEXT:    [[SPLIT22:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x i32> [[SPLIT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[SPLIT22]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <1 x i32> [[SPLIT15]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[SPLIT22]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul <1 x i32> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <1 x i32> [[SPLIT16]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <8 x i32> [[SPLIT22]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul <1 x i32> [[BLOCK26]], [[SPLAT_SPLAT28]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = add <1 x i32> [[TMP11]], [[TMP13]]
 ; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <1 x i32> [[SPLIT17]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[SPLIT22]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP15]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP15]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = mul <1 x i32> [[BLOCK29]], [[SPLAT_SPLAT31]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = add <1 x i32> [[TMP14]], [[TMP16]]
 ; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <1 x i32> [[SPLIT18]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <8 x i32> [[SPLIT22]], i64 4
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP18]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP18]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = mul <1 x i32> [[BLOCK32]], [[SPLAT_SPLAT34]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = add <1 x i32> [[TMP17]], [[TMP19]]
 ; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <1 x i32> [[SPLIT19]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[SPLIT22]], i64 5
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = mul <1 x i32> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = add <1 x i32> [[TMP20]], [[TMP22]]
 ; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <1 x i32> [[SPLIT20]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[SPLIT22]], i64 6
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> poison, i32 [[TMP24]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> poison, i32 [[TMP24]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT39]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = mul <1 x i32> [[BLOCK38]], [[SPLAT_SPLAT40]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = add <1 x i32> [[TMP23]], [[TMP25]]
 ; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <1 x i32> [[SPLIT21]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[SPLIT22]], i64 7
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> poison, i32 [[TMP27]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> poison, i32 [[TMP27]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP28:%.*]] = mul <1 x i32> [[BLOCK41]], [[SPLAT_SPLAT43]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = add <1 x i32> [[TMP26]], [[TMP28]]
@@ -619,24 +619,24 @@ define <1 x i32> @intrinsic_column_major_load_dot_product_i32_v4_strided(ptr %lh
 ; CHECK-NEXT:    [[SPLIT10:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x i32> [[SPLIT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[SPLIT10]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP3]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <1 x i32> [[SPLIT7]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[SPLIT10]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul <1 x i32> [[BLOCK11]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <1 x i32> [[TMP4]], [[TMP6]]
 ; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <1 x i32> [[SPLIT8]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[SPLIT10]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT15]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul <1 x i32> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = add <1 x i32> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <1 x i32> [[SPLIT9]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i32> [[SPLIT10]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP11]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP11]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul <1 x i32> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add <1 x i32> [[TMP10]], [[TMP12]]
@@ -671,36 +671,36 @@ define <1 x i16> @LoadInst_dot_product_i16_v6(ptr %lhs_address, ptr %rhs_address
 ; CHECK-NEXT:    [[COL_LOAD10:%.*]] = load <6 x i16>, ptr [[RHS_ADDRESS:%.*]], align 16
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <1 x i16> [[COL_LOAD]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <6 x i16> [[COL_LOAD10]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i16> poison, i16 [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i16> poison, i16 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <1 x i16> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <1 x i16> [[COL_LOAD1]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <6 x i16> [[COL_LOAD10]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i16> poison, i16 [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i16> poison, i16 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT12]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul <1 x i16> [[BLOCK11]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <1 x i16> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <1 x i16> [[COL_LOAD3]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <6 x i16> [[COL_LOAD10]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i16> poison, i16 [[TMP5]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i16> poison, i16 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT15]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul <1 x i16> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <1 x i16> [[TMP4]], [[TMP6]]
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <1 x i16> [[COL_LOAD5]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <6 x i16> [[COL_LOAD10]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i16> poison, i16 [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i16> poison, i16 [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT18]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul <1 x i16> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = add <1 x i16> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <1 x i16> [[COL_LOAD7]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <6 x i16> [[COL_LOAD10]], i64 4
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i16> poison, i16 [[TMP11]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i16> poison, i16 [[TMP11]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT21]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul <1 x i16> [[BLOCK20]], [[SPLAT_SPLAT22]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add <1 x i16> [[TMP10]], [[TMP12]]
 ; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <1 x i16> [[COL_LOAD9]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <6 x i16> [[COL_LOAD10]], i64 5
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i16> poison, i16 [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i16> poison, i16 [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i16> [[SPLAT_SPLATINSERT24]], <1 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul <1 x i16> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = add <1 x i16> [[TMP13]], [[TMP15]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
index caacff2e84895..3eed0d357bcb2 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
@@ -31,18 +31,18 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) {
 ; RM-NEXT:    store <2 x double> [[TMP4]], ptr [[VEC_GEP9]], align 8
 ; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP5:%.*]] = extractelement <3 x double> [[TMP0]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP5]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP5]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP6:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
 ; RM-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP7:%.*]] = extractelement <3 x double> [[TMP0]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[SPLAT_SPLAT12]], [[BLOCK10]]
 ; RM-NEXT:    [[TMP9:%.*]] = fadd <1 x double> [[TMP6]], [[TMP8]]
 ; RM-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP10:%.*]] = extractelement <3 x double> [[TMP0]], i64 2
-; RM-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP11:%.*]] = fmul <1 x double> [[SPLAT_SPLAT15]], [[BLOCK13]]
 ; RM-NEXT:    [[TMP12:%.*]] = fadd <1 x double> [[TMP9]], [[TMP11]]
@@ -50,18 +50,18 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) {
 ; RM-NEXT:    [[TMP14:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP13]], <2 x i32> <i32 2, i32 1>
 ; RM-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP15:%.*]] = extractelement <3 x double> [[TMP0]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP15]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP15]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP16:%.*]] = fmul <1 x double> [[SPLAT_SPLAT18]], [[BLOCK16]]
 ; RM-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP17:%.*]] = extractelement <3 x double> [[TMP0]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP17]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP17]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP18:%.*]] = fmul <1 x double> [[SPLAT_SPLAT21]], [[BLOCK19]]
 ; RM-NEXT:    [[TMP19:%.*]] = fadd <1 x double> [[TMP16]], [[TMP18]]
 ; RM-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP20:%.*]] = extractelement <3 x double> [[TMP0]], i64 2
-; RM-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP21:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]]
 ; RM-NEXT:    [[TMP22:%.*]] = fadd <1 x double> [[TMP19]], [[TMP21]]
@@ -69,18 +69,18 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) {
 ; RM-NEXT:    [[TMP24:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP23]], <2 x i32> <i32 0, i32 2>
 ; RM-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP25:%.*]] = extractelement <3 x double> [[TMP1]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP25]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP25]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP26:%.*]] = fmul <1 x double> [[SPLAT_SPLAT27]], [[BLOCK25]]
 ; RM-NEXT:    [[BLOCK28:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP27:%.*]] = extractelement <3 x double> [[TMP1]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x double> poison, double [[TMP27]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x double> poison, double [[TMP27]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT29]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP28:%.*]] = fmul <1 x double> [[SPLAT_SPLAT30]], [[BLOCK28]]
 ; RM-NEXT:    [[TMP29:%.*]] = fadd <1 x double> [[TMP26]], [[TMP28]]
 ; RM-NEXT:    [[BLOCK31:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP30:%.*]] = extractelement <3 x double> [[TMP1]], i64 2
-; RM-NEXT:    [[SPLAT_SPLATINSERT32:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT32:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT33:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT32]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP31:%.*]] = fmul <1 x double> [[SPLAT_SPLAT33]], [[BLOCK31]]
 ; RM-NEXT:    [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
@@ -88,18 +88,18 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) {
 ; RM-NEXT:    [[TMP34:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP33]], <2 x i32> <i32 2, i32 1>
 ; RM-NEXT:    [[BLOCK34:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP35:%.*]] = extractelement <3 x double> [[TMP1]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT35:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT35:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT36:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT35]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP36:%.*]] = fmul <1 x double> [[SPLAT_SPLAT36]], [[BLOCK34]]
 ; RM-NEXT:    [[BLOCK37:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP37:%.*]] = extractelement <3 x double> [[TMP1]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT38:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT38:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT39:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT38]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP38:%.*]] = fmul <1 x double> [[SPLAT_SPLAT39]], [[BLOCK37]]
 ; RM-NEXT:    [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
 ; RM-NEXT:    [[BLOCK40:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP40:%.*]] = extractelement <3 x double> [[TMP1]], i64 2
-; RM-NEXT:    [[SPLAT_SPLATINSERT41:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT41:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT42:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT41]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP41:%.*]] = fmul <1 x double> [[SPLAT_SPLAT42]], [[BLOCK40]]
 ; RM-NEXT:    [[TMP42:%.*]] = fadd <1 x double> [[TMP39]], [[TMP41]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll
index aff7cda13c15f..b3b1f9352bf4f 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll
@@ -11,48 +11,48 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul contract <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul contract <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]])
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll
index f307bf8fe999b..5879e9d0abc9a 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll
@@ -11,48 +11,48 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul contract <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul contract <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul contract <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]])
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll
index 6b0c52191e1e9..af7cdc8af4720 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll
@@ -11,12 +11,12 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT6]], [[BLOCK4]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
@@ -24,12 +24,12 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[SPLAT_SPLAT9]], [[BLOCK7]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT12]], [[BLOCK10]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
@@ -37,12 +37,12 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT15]], [[BLOCK13]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT18]], [[BLOCK16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
@@ -50,12 +50,12 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT21]], [[BLOCK19]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
@@ -79,28 +79,28 @@ define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) {
 ; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x double> [[SPLIT]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP2]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <1 x double> [[SPLIT]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <1 x double> [[SPLAT_SPLAT5]], [[BLOCK3]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <1 x double> [[SPLAT_SPLAT8]], [[BLOCK6]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x double> [[SPLAT_SPLAT11]], [[BLOCK9]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
@@ -125,12 +125,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x double> [[B]], <6 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
 ; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT7]], [[BLOCK5]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
@@ -138,12 +138,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[SPLAT_SPLAT10]], [[BLOCK8]]
 ; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT13]], [[BLOCK11]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
@@ -151,12 +151,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT16]], [[BLOCK14]]
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT19]], [[BLOCK17]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
@@ -164,12 +164,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT22]], [[BLOCK20]]
 ; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT25]], [[BLOCK23]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
@@ -177,12 +177,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = fmul <1 x double> [[SPLAT_SPLAT28]], [[BLOCK26]]
 ; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul <1 x double> [[SPLAT_SPLAT31]], [[BLOCK29]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
@@ -190,12 +190,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP36:%.*]] = fmul <1 x double> [[SPLAT_SPLAT34]], [[BLOCK32]]
 ; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = fmul <1 x double> [[SPLAT_SPLAT37]], [[BLOCK35]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
@@ -203,12 +203,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <3 x double> [[TMP34]], <3 x double> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[SPLAT_SPLAT40]], [[BLOCK38]]
 ; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = fmul <1 x double> [[SPLAT_SPLAT43]], [[BLOCK41]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
@@ -216,12 +216,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP49]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP49]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = fmul <1 x double> [[SPLAT_SPLAT46]], [[BLOCK44]]
 ; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP51]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP51]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = fmul <1 x double> [[SPLAT_SPLAT49]], [[BLOCK47]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = fadd <1 x double> [[TMP50]], [[TMP52]]
@@ -229,12 +229,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <3 x double> [[TMP48]], <3 x double> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP57:%.*]] = fmul <1 x double> [[SPLAT_SPLAT52]], [[BLOCK50]]
 ; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul <1 x double> [[SPLAT_SPLAT55]], [[BLOCK53]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll
index b8d12b7f881b0..af82d1ee0ba73 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll
@@ -11,12 +11,12 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[BLOCK4]], [[SPLAT_SPLAT6]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
@@ -24,12 +24,12 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[BLOCK10]], [[SPLAT_SPLAT12]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
@@ -37,12 +37,12 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[BLOCK16]], [[SPLAT_SPLAT18]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
@@ -50,12 +50,12 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
@@ -79,28 +79,28 @@ define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) {
 ; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP2]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <1 x double> [[BLOCK3]], [[SPLAT_SPLAT5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <1 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <1 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
@@ -125,12 +125,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x double> [[B]], <6 x double> poison, <2 x i32> <i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[BLOCK5]], [[SPLAT_SPLAT7]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
@@ -138,12 +138,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[BLOCK8]], [[SPLAT_SPLAT10]]
 ; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[BLOCK11]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
@@ -151,12 +151,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
@@ -164,12 +164,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
 ; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
@@ -177,12 +177,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]]
 ; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
@@ -190,12 +190,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP36:%.*]] = fmul <1 x double> [[BLOCK32]], [[SPLAT_SPLAT34]]
 ; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
@@ -203,12 +203,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <3 x double> [[TMP34]], <3 x double> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]]
 ; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
@@ -216,12 +216,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP49]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP49]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]]
 ; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP51]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP51]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = fadd <1 x double> [[TMP50]], [[TMP52]]
@@ -229,12 +229,12 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <3 x double> [[TMP48]], <3 x double> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP57:%.*]] = fmul <1 x double> [[BLOCK50]], [[SPLAT_SPLAT52]]
 ; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll
index 8a94e8806073a..9c69c6471cdf2 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll
@@ -11,48 +11,48 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul contract <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul contract <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul contract <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]])
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll
index 554f5558503eb..ab23352e807e1 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll
@@ -11,48 +11,48 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul contract <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul contract <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul contract <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul contract <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]])
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll
index 9cb966cf9c93a..b9068d48bcf67 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll
@@ -11,12 +11,12 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x float> [[BLOCK4]], [[SPLAT_SPLAT6]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x float> [[TMP1]], [[TMP3]]
@@ -24,12 +24,12 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x float> [[BLOCK10]], [[SPLAT_SPLAT12]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x float> [[TMP8]], [[TMP10]]
@@ -37,12 +37,12 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP12]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x float> [[BLOCK16]], [[SPLAT_SPLAT18]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x float> [[TMP15]], [[TMP17]]
@@ -50,12 +50,12 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP19]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x float> [[BLOCK22]], [[SPLAT_SPLAT24]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x float> [[TMP22]], [[TMP24]]
@@ -79,28 +79,28 @@ define <4 x float> @multiply_1x2(<2 x float> %a, <2 x float> %b) {
 ; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x float> [[B]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x float> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP2]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <1 x float> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT4]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <1 x float> [[BLOCK3]], [[SPLAT_SPLAT5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x float> [[TMP5]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <1 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT7]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <1 x float> [[BLOCK6]], [[SPLAT_SPLAT8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP10]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <1 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT10]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x float> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x float> [[TMP13]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
@@ -125,12 +125,12 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) {
 ; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x float> [[B]], <6 x float> poison, <2 x i32> <i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT6]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x float> [[BLOCK5]], [[SPLAT_SPLAT7]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x float> [[TMP1]], [[TMP3]]
@@ -138,12 +138,12 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT9]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x float> [[BLOCK8]], [[SPLAT_SPLAT10]]
 ; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT12]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x float> [[BLOCK11]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x float> [[TMP8]], [[TMP10]]
@@ -151,12 +151,12 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <3 x float> [[TMP6]], <3 x float> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT15]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x float> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x float> poison, float [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x float> poison, float [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT18]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x float> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x float> [[TMP15]], [[TMP17]]
@@ -164,12 +164,12 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <3 x float> [[TMP13]], <3 x float> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT21]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x float> [[BLOCK20]], [[SPLAT_SPLAT22]]
 ; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x float> poison, float [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x float> poison, float [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT24]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x float> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x float> [[TMP22]], [[TMP24]]
@@ -177,12 +177,12 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x float> poison, float [[TMP28]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x float> poison, float [[TMP28]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT27]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = fmul <1 x float> [[BLOCK26]], [[SPLAT_SPLAT28]]
 ; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x float> poison, float [[TMP30]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x float> poison, float [[TMP30]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT30]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul <1 x float> [[BLOCK29]], [[SPLAT_SPLAT31]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = fadd <1 x float> [[TMP29]], [[TMP31]]
@@ -190,12 +190,12 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) {
 ; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <3 x float> [[TMP27]], <3 x float> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x float> poison, float [[TMP35]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x float> poison, float [[TMP35]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT33]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP36:%.*]] = fmul <1 x float> [[BLOCK32]], [[SPLAT_SPLAT34]]
 ; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x float> poison, float [[TMP37]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x float> poison, float [[TMP37]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT36]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = fmul <1 x float> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = fadd <1 x float> [[TMP36]], [[TMP38]]
@@ -203,12 +203,12 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <3 x float> [[TMP34]], <3 x float> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x float> poison, float [[TMP42]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x float> poison, float [[TMP42]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT39]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x float> [[BLOCK38]], [[SPLAT_SPLAT40]]
 ; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x float> poison, float [[TMP44]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x float> poison, float [[TMP44]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT42]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = fmul <1 x float> [[BLOCK41]], [[SPLAT_SPLAT43]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd <1 x float> [[TMP43]], [[TMP45]]
@@ -216,12 +216,12 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) {
 ; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x float> poison, float [[TMP49]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x float> poison, float [[TMP49]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT45]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = fmul <1 x float> [[BLOCK44]], [[SPLAT_SPLAT46]]
 ; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x float> poison, float [[TMP51]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x float> poison, float [[TMP51]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT48]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = fmul <1 x float> [[BLOCK47]], [[SPLAT_SPLAT49]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = fadd <1 x float> [[TMP50]], [[TMP52]]
@@ -229,12 +229,12 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) {
 ; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <3 x float> [[TMP48]], <3 x float> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x float> poison, float [[TMP56]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x float> poison, float [[TMP56]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT51]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP57:%.*]] = fmul <1 x float> [[BLOCK50]], [[SPLAT_SPLAT52]]
 ; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x float> poison, float [[TMP58]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x float> poison, float [[TMP58]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT54]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul <1 x float> [[BLOCK53]], [[SPLAT_SPLAT55]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = fadd <1 x float> [[TMP57]], [[TMP59]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll
index 8bc37f6eddf4c..fb1925d48bb96 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll
@@ -41,12 +41,12 @@ define void @multiply_all_volatile(ptr noalias %A, ptr noalias %B, ptr noalias %
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK5]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]])
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT7]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK6]], <2 x double> [[SPLAT_SPLAT8]], <2 x double> [[TMP7]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
@@ -54,12 +54,12 @@ define void @multiply_all_volatile(ptr noalias %A, ptr noalias %B, ptr noalias %
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[BLOCK9]])
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT14]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK13]], <2 x double> [[SPLAT_SPLAT15]], <2 x double> [[TMP13]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
@@ -134,12 +134,12 @@ define void @multiply_load0_volatile(ptr noalias %A, ptr noalias %B, ptr noalias
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK5]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]])
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT7]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK6]], <2 x double> [[SPLAT_SPLAT8]], <2 x double> [[TMP7]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
@@ -147,12 +147,12 @@ define void @multiply_load0_volatile(ptr noalias %A, ptr noalias %B, ptr noalias
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[BLOCK9]])
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT14]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK13]], <2 x double> [[SPLAT_SPLAT15]], <2 x double> [[TMP13]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
@@ -226,12 +226,12 @@ define void @multiply_load1_volatile(ptr noalias %A, ptr noalias %B, ptr noalias
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK5]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]])
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT7]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK6]], <2 x double> [[SPLAT_SPLAT8]], <2 x double> [[TMP7]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
@@ -239,12 +239,12 @@ define void @multiply_load1_volatile(ptr noalias %A, ptr noalias %B, ptr noalias
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[BLOCK9]])
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT14]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK13]], <2 x double> [[SPLAT_SPLAT15]], <2 x double> [[TMP13]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
@@ -318,12 +318,12 @@ define void @multiply_store_volatile(ptr noalias %A, ptr noalias %B, ptr noalias
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK5]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]])
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT7]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK6]], <2 x double> [[SPLAT_SPLAT8]], <2 x double> [[TMP7]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
@@ -331,12 +331,12 @@ define void @multiply_store_volatile(ptr noalias %A, ptr noalias %B, ptr noalias
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[BLOCK9]])
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT14]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK13]], <2 x double> [[SPLAT_SPLAT15]], <2 x double> [[TMP13]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 0, i32 1>

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll
index 2dedee99042f1..c7a5f73b9879a 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll
@@ -13,12 +13,12 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) {
 ; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
 ; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
 ; RM-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], [[BLOCK4]]
 ; RM-NEXT:    [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
@@ -26,12 +26,12 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) {
 ; RM-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; RM-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT8]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP8:%.*]] = mul <1 x i32> [[SPLAT_SPLAT9]], [[BLOCK7]]
 ; RM-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP10:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], [[BLOCK10]]
 ; RM-NEXT:    [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
@@ -39,12 +39,12 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) {
 ; RM-NEXT:    [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP12]], <2 x i32> <i32 0, i32 2>
 ; RM-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT14]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT15]], [[BLOCK13]]
 ; RM-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT18]], [[BLOCK16]]
 ; RM-NEXT:    [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
@@ -52,12 +52,12 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) {
 ; RM-NEXT:    [[TMP20:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP19]], <2 x i32> <i32 2, i32 1>
 ; RM-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT20]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP22:%.*]] = mul <1 x i32> [[SPLAT_SPLAT21]], [[BLOCK19]]
 ; RM-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP24:%.*]] = mul <1 x i32> [[SPLAT_SPLAT24]], [[BLOCK22]]
 ; RM-NEXT:    [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]]
@@ -81,28 +81,28 @@ define <4 x i32> @multiply_1x2(<2 x i32> %a, <2 x i32> %b) {
 ; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x i32> [[B:%.*]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
 ; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <1 x i32> [[SPLIT]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
 ; RM-NEXT:    [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP2]], <2 x i32> <i32 2, i32 1>
 ; RM-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP4:%.*]] = extractelement <1 x i32> [[SPLIT]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT4]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP5:%.*]] = mul <1 x i32> [[SPLAT_SPLAT5]], [[BLOCK3]]
 ; RM-NEXT:    [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP6]], <2 x i32> <i32 0, i32 2>
 ; RM-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP8:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], [[BLOCK6]]
 ; RM-NEXT:    [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP11:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP10]], <2 x i32> <i32 2, i32 1>
 ; RM-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP12:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP13:%.*]] = mul <1 x i32> [[SPLAT_SPLAT11]], [[BLOCK9]]
 ; RM-NEXT:    [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
@@ -127,12 +127,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x i32> [[B]], <6 x i32> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
 ; RM-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT6]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT7]], [[BLOCK5]]
 ; RM-NEXT:    [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
@@ -140,12 +140,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; RM-NEXT:    [[TMP6:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
 ; RM-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP8:%.*]] = mul <1 x i32> [[SPLAT_SPLAT10]], [[BLOCK8]]
 ; RM-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP10:%.*]] = mul <1 x i32> [[SPLAT_SPLAT13]], [[BLOCK11]]
 ; RM-NEXT:    [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
@@ -153,12 +153,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; RM-NEXT:    [[TMP13:%.*]] = shufflevector <3 x i32> [[TMP6]], <3 x i32> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
 ; RM-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT15]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT16]], [[BLOCK14]]
 ; RM-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT19]], [[BLOCK17]]
 ; RM-NEXT:    [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
@@ -166,12 +166,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; RM-NEXT:    [[TMP20:%.*]] = shufflevector <3 x i32> [[TMP13]], <3 x i32> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
 ; RM-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP22:%.*]] = mul <1 x i32> [[SPLAT_SPLAT22]], [[BLOCK20]]
 ; RM-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP24:%.*]] = mul <1 x i32> [[SPLAT_SPLAT25]], [[BLOCK23]]
 ; RM-NEXT:    [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]]
@@ -179,12 +179,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; RM-NEXT:    [[TMP27:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
 ; RM-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP28:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP28]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP28]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP29:%.*]] = mul <1 x i32> [[SPLAT_SPLAT28]], [[BLOCK26]]
 ; RM-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP30:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP30]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP30]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP31:%.*]] = mul <1 x i32> [[SPLAT_SPLAT31]], [[BLOCK29]]
 ; RM-NEXT:    [[TMP32:%.*]] = add <1 x i32> [[TMP29]], [[TMP31]]
@@ -192,12 +192,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; RM-NEXT:    [[TMP34:%.*]] = shufflevector <3 x i32> [[TMP27]], <3 x i32> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
 ; RM-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP35:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP35]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP35]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP36:%.*]] = mul <1 x i32> [[SPLAT_SPLAT34]], [[BLOCK32]]
 ; RM-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP37:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP37]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP37]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP38:%.*]] = mul <1 x i32> [[SPLAT_SPLAT37]], [[BLOCK35]]
 ; RM-NEXT:    [[TMP39:%.*]] = add <1 x i32> [[TMP36]], [[TMP38]]
@@ -205,12 +205,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; RM-NEXT:    [[TMP41:%.*]] = shufflevector <3 x i32> [[TMP34]], <3 x i32> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
 ; RM-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP42:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> poison, i32 [[TMP42]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> poison, i32 [[TMP42]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT39]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP43:%.*]] = mul <1 x i32> [[SPLAT_SPLAT40]], [[BLOCK38]]
 ; RM-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP44:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> poison, i32 [[TMP44]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> poison, i32 [[TMP44]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP45:%.*]] = mul <1 x i32> [[SPLAT_SPLAT43]], [[BLOCK41]]
 ; RM-NEXT:    [[TMP46:%.*]] = add <1 x i32> [[TMP43]], [[TMP45]]
@@ -218,12 +218,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; RM-NEXT:    [[TMP48:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
 ; RM-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP49:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x i32> poison, i32 [[TMP49]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x i32> poison, i32 [[TMP49]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT45]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP50:%.*]] = mul <1 x i32> [[SPLAT_SPLAT46]], [[BLOCK44]]
 ; RM-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP51:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x i32> poison, i32 [[TMP51]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x i32> poison, i32 [[TMP51]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT48]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP52:%.*]] = mul <1 x i32> [[SPLAT_SPLAT49]], [[BLOCK47]]
 ; RM-NEXT:    [[TMP53:%.*]] = add <1 x i32> [[TMP50]], [[TMP52]]
@@ -231,12 +231,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; RM-NEXT:    [[TMP55:%.*]] = shufflevector <3 x i32> [[TMP48]], <3 x i32> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
 ; RM-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP56:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
-; RM-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x i32> poison, i32 [[TMP56]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x i32> poison, i32 [[TMP56]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT51]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP57:%.*]] = mul <1 x i32> [[SPLAT_SPLAT52]], [[BLOCK50]]
 ; RM-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP58:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
-; RM-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x i32> poison, i32 [[TMP58]], i32 0
+; RM-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x i32> poison, i32 [[TMP58]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT54]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP59:%.*]] = mul <1 x i32> [[SPLAT_SPLAT55]], [[BLOCK53]]
 ; RM-NEXT:    [[TMP60:%.*]] = add <1 x i32> [[TMP57]], [[TMP59]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll
index f4f732e00bcfd..68b71cad799d3 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll
@@ -11,12 +11,12 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul <1 x i32> [[BLOCK4]], [[SPLAT_SPLAT6]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
@@ -24,12 +24,12 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT8]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <1 x i32> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul <1 x i32> [[BLOCK10]], [[SPLAT_SPLAT12]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
@@ -37,12 +37,12 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP12]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT14]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul <1 x i32> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = mul <1 x i32> [[BLOCK16]], [[SPLAT_SPLAT18]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
@@ -50,12 +50,12 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP19]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT20]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = mul <1 x i32> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = mul <1 x i32> [[BLOCK22]], [[SPLAT_SPLAT24]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]]
@@ -79,28 +79,28 @@ define <4 x i32> @multiply_1x2(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP2]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT4]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <1 x i32> [[BLOCK3]], [[SPLAT_SPLAT5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP6]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <1 x i32> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul <1 x i32> [[BLOCK6]], [[SPLAT_SPLAT8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP10]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <1 x i32> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul <1 x i32> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
@@ -125,12 +125,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x i32> [[B]], <6 x i32> poison, <2 x i32> <i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT6]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul <1 x i32> [[BLOCK5]], [[SPLAT_SPLAT7]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
@@ -138,12 +138,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <1 x i32> [[BLOCK8]], [[SPLAT_SPLAT10]]
 ; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul <1 x i32> [[BLOCK11]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
@@ -151,12 +151,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <3 x i32> [[TMP6]], <3 x i32> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT15]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul <1 x i32> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = mul <1 x i32> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
@@ -164,12 +164,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <3 x i32> [[TMP13]], <3 x i32> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = mul <1 x i32> [[BLOCK20]], [[SPLAT_SPLAT22]]
 ; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = mul <1 x i32> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]]
@@ -177,12 +177,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP28]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP28]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = mul <1 x i32> [[BLOCK26]], [[SPLAT_SPLAT28]]
 ; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP30]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP30]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = mul <1 x i32> [[BLOCK29]], [[SPLAT_SPLAT31]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = add <1 x i32> [[TMP29]], [[TMP31]]
@@ -190,12 +190,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <3 x i32> [[TMP27]], <3 x i32> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP35]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP35]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP36:%.*]] = mul <1 x i32> [[BLOCK32]], [[SPLAT_SPLAT34]]
 ; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP37]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP37]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = mul <1 x i32> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = add <1 x i32> [[TMP36]], [[TMP38]]
@@ -203,12 +203,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <3 x i32> [[TMP34]], <3 x i32> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> poison, i32 [[TMP42]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> poison, i32 [[TMP42]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT39]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = mul <1 x i32> [[BLOCK38]], [[SPLAT_SPLAT40]]
 ; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> poison, i32 [[TMP44]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> poison, i32 [[TMP44]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = mul <1 x i32> [[BLOCK41]], [[SPLAT_SPLAT43]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = add <1 x i32> [[TMP43]], [[TMP45]]
@@ -216,12 +216,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x i32> poison, i32 [[TMP49]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x i32> poison, i32 [[TMP49]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT45]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = mul <1 x i32> [[BLOCK44]], [[SPLAT_SPLAT46]]
 ; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x i32> poison, i32 [[TMP51]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x i32> poison, i32 [[TMP51]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT48]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = mul <1 x i32> [[BLOCK47]], [[SPLAT_SPLAT49]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = add <1 x i32> [[TMP50]], [[TMP52]]
@@ -229,12 +229,12 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <3 x i32> [[TMP48]], <3 x i32> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x i32> poison, i32 [[TMP56]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x i32> poison, i32 [[TMP56]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT51]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP57:%.*]] = mul <1 x i32> [[BLOCK50]], [[SPLAT_SPLAT52]]
 ; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x i32> poison, i32 [[TMP58]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x i32> poison, i32 [[TMP58]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT54]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP59:%.*]] = mul <1 x i32> [[BLOCK53]], [[SPLAT_SPLAT55]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = add <1 x i32> [[TMP57]], [[TMP59]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-left-transpose-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-left-transpose-row-major.ll
index 63eb8e759891d..ff6bc25d4c45d 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-left-transpose-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-left-transpose-row-major.ll
@@ -11,12 +11,12 @@ define <4 x double> @multiply_left_transpose_2x2(<4 x double> %a, <4 x double> %
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT6]], [[BLOCK4]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
@@ -24,12 +24,12 @@ define <4 x double> @multiply_left_transpose_2x2(<4 x double> %a, <4 x double> %
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[SPLAT_SPLAT9]], [[BLOCK7]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT12]], [[BLOCK10]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
@@ -37,12 +37,12 @@ define <4 x double> @multiply_left_transpose_2x2(<4 x double> %a, <4 x double> %
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT15]], [[BLOCK13]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT18]], [[BLOCK16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
@@ -50,12 +50,12 @@ define <4 x double> @multiply_left_transpose_2x2(<4 x double> %a, <4 x double> %
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT21]], [[BLOCK19]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll
index 92f0f1a7e2f38..0b178335d76c1 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll
@@ -10,12 +10,12 @@ define <4 x double> @multiply_right_transpose_2x2(<4 x double> %a, <4 x double>
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[BLOCK4]], [[SPLAT_SPLAT6]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
@@ -23,12 +23,12 @@ define <4 x double> @multiply_right_transpose_2x2(<4 x double> %a, <4 x double>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[BLOCK10]], [[SPLAT_SPLAT12]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
@@ -36,12 +36,12 @@ define <4 x double> @multiply_right_transpose_2x2(<4 x double> %a, <4 x double>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[BLOCK16]], [[SPLAT_SPLAT18]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
@@ -49,12 +49,12 @@ define <4 x double> @multiply_right_transpose_2x2(<4 x double> %a, <4 x double>
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
@@ -99,18 +99,18 @@ define <4 x double> @multiply_right_transpose_2x3x2(<6 x double> %a, <6 x double
 ; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <6 x double> [[B]], <6 x double> poison, <2 x i32> <i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = fadd <1 x double> [[TMP13]], [[TMP15]]
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <2 x double> [[SPLIT5]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP17]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP17]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = fadd <1 x double> [[TMP16]], [[TMP18]]
@@ -118,18 +118,18 @@ define <4 x double> @multiply_right_transpose_2x3x2(<6 x double> %a, <6 x double
 ; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP20]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
 ; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP24]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP24]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = fadd <1 x double> [[TMP23]], [[TMP25]]
 ; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <2 x double> [[SPLIT5]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP27]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP27]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP28:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = fadd <1 x double> [[TMP26]], [[TMP28]]
@@ -137,18 +137,18 @@ define <4 x double> @multiply_right_transpose_2x3x2(<6 x double> %a, <6 x double
 ; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> [[TMP30]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP33:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
 ; CHECK-NEXT:    [[BLOCK24:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP34]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP34]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = fadd <1 x double> [[TMP33]], [[TMP35]]
 ; CHECK-NEXT:    [[BLOCK27:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x double> [[SPLIT5]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
@@ -156,18 +156,18 @@ define <4 x double> @multiply_right_transpose_2x3x2(<6 x double> %a, <6 x double
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP40]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK30:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
 ; CHECK-NEXT:    [[BLOCK33:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
 ; CHECK-NEXT:    [[BLOCK36:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <2 x double> [[SPLIT5]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP47]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP47]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP48:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
 ; CHECK-NEXT:    [[TMP49:%.*]] = fadd <1 x double> [[TMP46]], [[TMP48]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll
index 5775724e7af0b..ed6791009b680 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll
@@ -9,48 +9,48 @@ define <4 x float> @preserve_fmf_fast(<4 x float> %m, <4 x float> %n) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP3]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP2]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP8]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <1 x float> [[TMP10]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP11]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP13]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP13]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = fmul fast <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP15]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP15]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP14]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <1 x float> [[TMP16]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP17]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP19]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP19]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP20:%.*]] = fmul fast <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP20]])
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <1 x float> [[TMP22]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
@@ -75,12 +75,12 @@ define <4 x float> @preserve_fmf_reassoc(<4 x float> %m, <4 x float> %n) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP3]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul reassoc <1 x float> [[BLOCK4]], [[SPLAT_SPLAT6]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd reassoc <1 x float> [[TMP2]], [[TMP4]]
@@ -88,12 +88,12 @@ define <4 x float> @preserve_fmf_reassoc(<4 x float> %m, <4 x float> %n) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP6]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul reassoc <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP10]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul reassoc <1 x float> [[BLOCK10]], [[SPLAT_SPLAT12]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd reassoc <1 x float> [[TMP9]], [[TMP11]]
@@ -101,12 +101,12 @@ define <4 x float> @preserve_fmf_reassoc(<4 x float> %m, <4 x float> %n) {
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP13]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP15]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP15]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = fmul reassoc <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP17]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP17]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = fmul reassoc <1 x float> [[BLOCK16]], [[SPLAT_SPLAT18]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = fadd reassoc <1 x float> [[TMP16]], [[TMP18]]
@@ -114,12 +114,12 @@ define <4 x float> @preserve_fmf_reassoc(<4 x float> %m, <4 x float> %n) {
 ; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP20]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP22]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP22]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = fmul reassoc <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP24]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP24]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = fmul reassoc <1 x float> [[BLOCK22]], [[SPLAT_SPLAT24]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = fadd reassoc <1 x float> [[TMP23]], [[TMP25]]
@@ -145,48 +145,48 @@ define <4 x float> @preserve_fmf_contract_reassoc(<4 x float> %m, <4 x float> %n
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc contract <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP3]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP2]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul reassoc contract <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP8]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <1 x float> [[TMP10]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP11]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP13]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP13]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = fmul reassoc contract <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP15]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP15]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP14]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <1 x float> [[TMP16]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP17]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP19]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP19]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP20:%.*]] = fmul reassoc contract <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP20]])
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <1 x float> [[TMP22]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll
index 92f0ca79f8841..bd1a10bd18f43 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll
@@ -68,18 +68,18 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[SPLIT11:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
 ; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP47]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP47]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP48:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
 ; CHECK-NEXT:    [[TMP49:%.*]] = fadd <1 x double> [[TMP46]], [[TMP48]]
@@ -87,18 +87,18 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[TMP51:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP50]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP52:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP53:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
 ; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP54:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP54]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP54]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP55:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
 ; CHECK-NEXT:    [[TMP56:%.*]] = fadd <1 x double> [[TMP53]], [[TMP55]]
 ; CHECK-NEXT:    [[BLOCK24:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP57:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP57]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP57]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP58:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
 ; CHECK-NEXT:    [[TMP59:%.*]] = fadd <1 x double> [[TMP56]], [[TMP58]]
@@ -106,18 +106,18 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <3 x double> [[TMP51]], <3 x double> [[TMP60]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK27:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP63:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
 ; CHECK-NEXT:    [[BLOCK30:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP64]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP64]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP65:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
 ; CHECK-NEXT:    [[TMP66:%.*]] = fadd <1 x double> [[TMP63]], [[TMP65]]
 ; CHECK-NEXT:    [[BLOCK33:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP67:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP67]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP67]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP68:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
 ; CHECK-NEXT:    [[TMP69:%.*]] = fadd <1 x double> [[TMP66]], [[TMP68]]
@@ -125,18 +125,18 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[TMP71:%.*]] = shufflevector <3 x double> [[TMP61]], <3 x double> [[TMP70]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK36:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP72:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP73:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
 ; CHECK-NEXT:    [[BLOCK39:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP74]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP74]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP75:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]]
 ; CHECK-NEXT:    [[TMP76:%.*]] = fadd <1 x double> [[TMP73]], [[TMP75]]
 ; CHECK-NEXT:    [[BLOCK42:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP77]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP77]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP78:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]]
 ; CHECK-NEXT:    [[TMP79:%.*]] = fadd <1 x double> [[TMP76]], [[TMP78]]
@@ -144,18 +144,18 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[TMP81:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP80]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK45:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP83:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]]
 ; CHECK-NEXT:    [[BLOCK48:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP84:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP84]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP84]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP85:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]]
 ; CHECK-NEXT:    [[TMP86:%.*]] = fadd <1 x double> [[TMP83]], [[TMP85]]
 ; CHECK-NEXT:    [[BLOCK51:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP87:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP87]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP87]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP88:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]]
 ; CHECK-NEXT:    [[TMP89:%.*]] = fadd <1 x double> [[TMP86]], [[TMP88]]
@@ -163,18 +163,18 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[TMP91:%.*]] = shufflevector <3 x double> [[TMP81]], <3 x double> [[TMP90]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK54:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP93:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]]
 ; CHECK-NEXT:    [[BLOCK57:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP94:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP94]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP94]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP95:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]]
 ; CHECK-NEXT:    [[TMP96:%.*]] = fadd <1 x double> [[TMP93]], [[TMP95]]
 ; CHECK-NEXT:    [[BLOCK60:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP97:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP97]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP97]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP98:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]]
 ; CHECK-NEXT:    [[TMP99:%.*]] = fadd <1 x double> [[TMP96]], [[TMP98]]
@@ -182,18 +182,18 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[TMP101:%.*]] = shufflevector <3 x double> [[TMP91]], <3 x double> [[TMP100]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK63:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP102:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP103:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]]
 ; CHECK-NEXT:    [[BLOCK66:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP104]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP104]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP105:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]]
 ; CHECK-NEXT:    [[TMP106:%.*]] = fadd <1 x double> [[TMP103]], [[TMP105]]
 ; CHECK-NEXT:    [[BLOCK69:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP107]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP107]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP108:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]]
 ; CHECK-NEXT:    [[TMP109:%.*]] = fadd <1 x double> [[TMP106]], [[TMP108]]
@@ -201,18 +201,18 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[TMP111:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP110]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK72:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP112:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP112]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP112]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP113:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]]
 ; CHECK-NEXT:    [[BLOCK75:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP114:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP114]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP114]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP115:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]]
 ; CHECK-NEXT:    [[TMP116:%.*]] = fadd <1 x double> [[TMP113]], [[TMP115]]
 ; CHECK-NEXT:    [[BLOCK78:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP117:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP117]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP117]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP118:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]]
 ; CHECK-NEXT:    [[TMP119:%.*]] = fadd <1 x double> [[TMP116]], [[TMP118]]
@@ -220,18 +220,18 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[TMP121:%.*]] = shufflevector <3 x double> [[TMP111]], <3 x double> [[TMP120]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK81:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP122:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP122]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP122]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP123:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]]
 ; CHECK-NEXT:    [[BLOCK84:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP124:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP124]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP124]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP125:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]]
 ; CHECK-NEXT:    [[TMP126:%.*]] = fadd <1 x double> [[TMP123]], [[TMP125]]
 ; CHECK-NEXT:    [[BLOCK87:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP127:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT88:%.*]] = insertelement <1 x double> poison, double [[TMP127]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT88:%.*]] = insertelement <1 x double> poison, double [[TMP127]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT89:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT88]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP128:%.*]] = fmul <1 x double> [[BLOCK87]], [[SPLAT_SPLAT89]]
 ; CHECK-NEXT:    [[TMP129:%.*]] = fadd <1 x double> [[TMP126]], [[TMP128]]
@@ -298,18 +298,18 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B
 ; CHECK-NEXT:    [[SPLIT8:%.*]] = shufflevector <9 x double> [[A_FOO]], <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP24]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP24]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = fadd <1 x double> [[TMP23]], [[TMP25]]
 ; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP27]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP27]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP28:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = fadd <1 x double> [[TMP26]], [[TMP28]]
@@ -317,18 +317,18 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B
 ; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP30]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP33:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
 ; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP34]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP34]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = fadd <1 x double> [[TMP33]], [[TMP35]]
 ; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
@@ -336,18 +336,18 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <3 x double> [[TMP31]], <3 x double> [[TMP40]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK24:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
 ; CHECK-NEXT:    [[BLOCK27:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
 ; CHECK-NEXT:    [[BLOCK30:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP47]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP47]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP48:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
 ; CHECK-NEXT:    [[TMP49:%.*]] = fadd <1 x double> [[TMP46]], [[TMP48]]
@@ -355,18 +355,18 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B
 ; CHECK-NEXT:    [[TMP51:%.*]] = shufflevector <3 x double> [[TMP41]], <3 x double> [[TMP50]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK33:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP53:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
 ; CHECK-NEXT:    [[BLOCK36:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP54:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP54]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP54]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP55:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
 ; CHECK-NEXT:    [[TMP56:%.*]] = fadd <1 x double> [[TMP53]], [[TMP55]]
 ; CHECK-NEXT:    [[BLOCK39:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP57:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP57]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP57]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP58:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]]
 ; CHECK-NEXT:    [[TMP59:%.*]] = fadd <1 x double> [[TMP56]], [[TMP58]]
@@ -374,18 +374,18 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B
 ; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP60]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK42:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP63:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]]
 ; CHECK-NEXT:    [[BLOCK45:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP64]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP64]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP65:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]]
 ; CHECK-NEXT:    [[TMP66:%.*]] = fadd <1 x double> [[TMP63]], [[TMP65]]
 ; CHECK-NEXT:    [[BLOCK48:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP67:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP67]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP67]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP68:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]]
 ; CHECK-NEXT:    [[TMP69:%.*]] = fadd <1 x double> [[TMP66]], [[TMP68]]
@@ -393,18 +393,18 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B
 ; CHECK-NEXT:    [[TMP71:%.*]] = shufflevector <3 x double> [[TMP61]], <3 x double> [[TMP70]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK51:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP72:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP73:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]]
 ; CHECK-NEXT:    [[BLOCK54:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP74]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP74]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP75:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]]
 ; CHECK-NEXT:    [[TMP76:%.*]] = fadd <1 x double> [[TMP73]], [[TMP75]]
 ; CHECK-NEXT:    [[BLOCK57:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP77]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP77]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP78:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]]
 ; CHECK-NEXT:    [[TMP79:%.*]] = fadd <1 x double> [[TMP76]], [[TMP78]]
@@ -412,18 +412,18 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B
 ; CHECK-NEXT:    [[TMP81:%.*]] = shufflevector <3 x double> [[TMP71]], <3 x double> [[TMP80]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK60:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP83:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]]
 ; CHECK-NEXT:    [[BLOCK63:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP84:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP84]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP84]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP85:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]]
 ; CHECK-NEXT:    [[TMP86:%.*]] = fadd <1 x double> [[TMP83]], [[TMP85]]
 ; CHECK-NEXT:    [[BLOCK66:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP87:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP87]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP87]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP88:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]]
 ; CHECK-NEXT:    [[TMP89:%.*]] = fadd <1 x double> [[TMP86]], [[TMP88]]
@@ -431,18 +431,18 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B
 ; CHECK-NEXT:    [[TMP91:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP90]], <3 x i32> <i32 3, i32 1, i32 2>
 ; CHECK-NEXT:    [[BLOCK69:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP93:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]]
 ; CHECK-NEXT:    [[BLOCK72:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP94:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP94]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP94]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP95:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]]
 ; CHECK-NEXT:    [[TMP96:%.*]] = fadd <1 x double> [[TMP93]], [[TMP95]]
 ; CHECK-NEXT:    [[BLOCK75:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP97:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP97]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP97]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP98:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]]
 ; CHECK-NEXT:    [[TMP99:%.*]] = fadd <1 x double> [[TMP96]], [[TMP98]]
@@ -450,18 +450,18 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B
 ; CHECK-NEXT:    [[TMP101:%.*]] = shufflevector <3 x double> [[TMP91]], <3 x double> [[TMP100]], <3 x i32> <i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[BLOCK78:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP102:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP103:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]]
 ; CHECK-NEXT:    [[BLOCK81:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP104]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP104]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP105:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]]
 ; CHECK-NEXT:    [[TMP106:%.*]] = fadd <1 x double> [[TMP103]], [[TMP105]]
 ; CHECK-NEXT:    [[BLOCK84:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP107]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP107]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP108:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]]
 ; CHECK-NEXT:    [[TMP109:%.*]] = fadd <1 x double> [[TMP106]], [[TMP108]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts-iterator-invalidation.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts-iterator-invalidation.ll
index 900e8c8308f1a..c27d965fc217d 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts-iterator-invalidation.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts-iterator-invalidation.ll
@@ -11,12 +11,12 @@ define <2 x double> @test(<4 x double> %a, <2 x double> %b, i1 %c) {
 ; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[BLOCK3]], [[SPLAT_SPLAT5]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
@@ -24,12 +24,12 @@ define <2 x double> @test(<4 x double> %a, <2 x double> %b, i1 %c) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]]
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll
index f2f9f2682ab4d..5a0fbf1f5dca9 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll
@@ -91,12 +91,12 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[COL_LOAD56:%.*]] = load <4 x double>, ptr [[VEC_GEP54]], align 16
 ; CHECK-NEXT:    [[BLOCK57:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[COL_LOAD45]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT59:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT58]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[BLOCK57]], [[SPLAT_SPLAT59]]
 ; CHECK-NEXT:    [[BLOCK60:%.*]] = shufflevector <4 x double> [[COL_LOAD56]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD45]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT62:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT61]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[BLOCK60]], [[SPLAT_SPLAT62]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP7]], [[TMP9]]
@@ -104,12 +104,12 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP11]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK63:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x double> [[COL_LOAD45]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT65:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT64]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = fmul <2 x double> [[BLOCK63]], [[SPLAT_SPLAT65]]
 ; CHECK-NEXT:    [[BLOCK66:%.*]] = shufflevector <4 x double> [[COL_LOAD56]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x double> [[COL_LOAD45]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <2 x double> poison, double [[TMP15]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <2 x double> poison, double [[TMP15]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT68:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT67]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = fmul <2 x double> [[BLOCK66]], [[SPLAT_SPLAT68]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = fadd <2 x double> [[TMP14]], [[TMP16]]
@@ -117,12 +117,12 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK69:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD48]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <2 x double> poison, double [[TMP20]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <2 x double> poison, double [[TMP20]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT71:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT70]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = fmul <2 x double> [[BLOCK69]], [[SPLAT_SPLAT71]]
 ; CHECK-NEXT:    [[BLOCK72:%.*]] = shufflevector <4 x double> [[COL_LOAD56]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[COL_LOAD48]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <2 x double> poison, double [[TMP22]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <2 x double> poison, double [[TMP22]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT74:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT73]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = fmul <2 x double> [[BLOCK72]], [[SPLAT_SPLAT74]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = fadd <2 x double> [[TMP21]], [[TMP23]]
@@ -130,12 +130,12 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP25]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK75:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <2 x double> [[COL_LOAD48]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <2 x double> poison, double [[TMP27]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <2 x double> poison, double [[TMP27]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT77:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT76]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP28:%.*]] = fmul <2 x double> [[BLOCK75]], [[SPLAT_SPLAT77]]
 ; CHECK-NEXT:    [[BLOCK78:%.*]] = shufflevector <4 x double> [[COL_LOAD56]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <2 x double> [[COL_LOAD48]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <2 x double> poison, double [[TMP29]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <2 x double> poison, double [[TMP29]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT80:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT79]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP30:%.*]] = fmul <2 x double> [[BLOCK78]], [[SPLAT_SPLAT80]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = fadd <2 x double> [[TMP28]], [[TMP30]]
@@ -143,12 +143,12 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP33:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> [[TMP32]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK81:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <2 x double> [[COL_LOAD51]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <2 x double> poison, double [[TMP34]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <2 x double> poison, double [[TMP34]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT83:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT82]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul <2 x double> [[BLOCK81]], [[SPLAT_SPLAT83]]
 ; CHECK-NEXT:    [[BLOCK84:%.*]] = shufflevector <4 x double> [[COL_LOAD56]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <2 x double> [[COL_LOAD51]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <2 x double> poison, double [[TMP36]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <2 x double> poison, double [[TMP36]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT86:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT85]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP37:%.*]] = fmul <2 x double> [[BLOCK84]], [[SPLAT_SPLAT86]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = fadd <2 x double> [[TMP35]], [[TMP37]]
@@ -156,12 +156,12 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP39]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK87:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <2 x double> [[COL_LOAD51]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT88:%.*]] = insertelement <2 x double> poison, double [[TMP41]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT88:%.*]] = insertelement <2 x double> poison, double [[TMP41]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT89:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT88]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42:%.*]] = fmul <2 x double> [[BLOCK87]], [[SPLAT_SPLAT89]]
 ; CHECK-NEXT:    [[BLOCK90:%.*]] = shufflevector <4 x double> [[COL_LOAD56]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <2 x double> [[COL_LOAD51]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT91:%.*]] = insertelement <2 x double> poison, double [[TMP43]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT91:%.*]] = insertelement <2 x double> poison, double [[TMP43]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT92:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT91]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = fmul <2 x double> [[BLOCK90]], [[SPLAT_SPLAT92]]
 ; CHECK-NEXT:    [[TMP45:%.*]] = fadd <2 x double> [[TMP42]], [[TMP44]]
@@ -178,18 +178,18 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <12 x double> [[TMP50]], <12 x double> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <4 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP51]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP51]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = fmul <2 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP53:%.*]] = extractelement <4 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP53]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP53]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT7]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP54:%.*]] = fmul <2 x double> [[BLOCK6]], [[SPLAT_SPLAT8]]
 ; CHECK-NEXT:    [[TMP55:%.*]] = fadd <2 x double> [[TMP52]], [[TMP54]]
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <4 x double> [[SPLIT5]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <2 x double> poison, double [[TMP56]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <2 x double> poison, double [[TMP56]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT10]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP57:%.*]] = fmul <2 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP58:%.*]] = fadd <2 x double> [[TMP55]], [[TMP57]]
@@ -197,18 +197,18 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP60:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP59]], <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP61:%.*]] = extractelement <4 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <2 x double> poison, double [[TMP61]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <2 x double> poison, double [[TMP61]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT13]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP62:%.*]] = fmul <2 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
 ; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP63:%.*]] = extractelement <4 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <2 x double> poison, double [[TMP63]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <2 x double> poison, double [[TMP63]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT16]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP64:%.*]] = fmul <2 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
 ; CHECK-NEXT:    [[TMP65:%.*]] = fadd <2 x double> [[TMP62]], [[TMP64]]
 ; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP66:%.*]] = extractelement <4 x double> [[SPLIT5]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <2 x double> poison, double [[TMP66]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <2 x double> poison, double [[TMP66]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT19]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP67:%.*]] = fmul <2 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
 ; CHECK-NEXT:    [[TMP68:%.*]] = fadd <2 x double> [[TMP65]], [[TMP67]]
@@ -216,18 +216,18 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP70:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP69]], <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP71:%.*]] = extractelement <4 x double> [[SPLIT3]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <2 x double> poison, double [[TMP71]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <2 x double> poison, double [[TMP71]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT23:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT22]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP72:%.*]] = fmul <2 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
 ; CHECK-NEXT:    [[BLOCK24:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP73:%.*]] = extractelement <4 x double> [[SPLIT4]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <2 x double> poison, double [[TMP73]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <2 x double> poison, double [[TMP73]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT26:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT25]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP74:%.*]] = fmul <2 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
 ; CHECK-NEXT:    [[TMP75:%.*]] = fadd <2 x double> [[TMP72]], [[TMP74]]
 ; CHECK-NEXT:    [[BLOCK27:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP76:%.*]] = extractelement <4 x double> [[SPLIT5]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <2 x double> poison, double [[TMP76]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <2 x double> poison, double [[TMP76]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT29:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT28]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP77:%.*]] = fmul <2 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
 ; CHECK-NEXT:    [[TMP78:%.*]] = fadd <2 x double> [[TMP75]], [[TMP77]]
@@ -235,18 +235,18 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP80:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP79]], <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK30:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <4 x double> [[SPLIT3]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <2 x double> poison, double [[TMP81]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <2 x double> poison, double [[TMP81]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT32:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT31]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP82:%.*]] = fmul <2 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
 ; CHECK-NEXT:    [[BLOCK33:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP83:%.*]] = extractelement <4 x double> [[SPLIT4]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <2 x double> poison, double [[TMP83]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <2 x double> poison, double [[TMP83]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT35:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT34]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP84:%.*]] = fmul <2 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
 ; CHECK-NEXT:    [[TMP85:%.*]] = fadd <2 x double> [[TMP82]], [[TMP84]]
 ; CHECK-NEXT:    [[BLOCK36:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP86:%.*]] = extractelement <4 x double> [[SPLIT5]], i64 3
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <2 x double> poison, double [[TMP86]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <2 x double> poison, double [[TMP86]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT38:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT37]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP87:%.*]] = fmul <2 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
 ; CHECK-NEXT:    [[TMP88:%.*]] = fadd <2 x double> [[TMP85]], [[TMP87]]
@@ -307,18 +307,18 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[COL_LOAD13:%.*]] = load <3 x double>, ptr [[VEC_GEP11]], align 16
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[COL_LOAD10]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]]
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[COL_LOAD13]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP6]], [[TMP8]]
@@ -326,18 +326,18 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP10]], <3 x i32> <i32 3, i32 4, i32 2>
 ; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
 ; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[COL_LOAD10]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = fadd <1 x double> [[TMP13]], [[TMP15]]
 ; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[COL_LOAD13]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP17]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP17]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = fadd <1 x double> [[TMP16]], [[TMP18]]
@@ -345,18 +345,18 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> [[TMP20]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <2 x double> poison, double [[TMP22]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <2 x double> poison, double [[TMP22]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT30]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = fmul <2 x double> [[BLOCK29]], [[SPLAT_SPLAT31]]
 ; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[COL_LOAD10]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <2 x double> poison, double [[TMP24]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <2 x double> poison, double [[TMP24]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT33]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = fmul <2 x double> [[BLOCK32]], [[SPLAT_SPLAT34]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = fadd <2 x double> [[TMP23]], [[TMP25]]
 ; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[COL_LOAD13]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <2 x double> poison, double [[TMP27]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <2 x double> poison, double [[TMP27]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT36]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP28:%.*]] = fmul <2 x double> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = fadd <2 x double> [[TMP26]], [[TMP28]]
@@ -364,18 +364,18 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP30]], <3 x i32> <i32 3, i32 4, i32 2>
 ; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP33:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]]
 ; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[COL_LOAD10]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP34]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP34]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = fadd <1 x double> [[TMP33]], [[TMP35]]
 ; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[COL_LOAD13]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
@@ -383,18 +383,18 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <3 x double> [[TMP31]], <3 x double> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <2 x double> poison, double [[TMP42]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <2 x double> poison, double [[TMP42]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT48]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <2 x double> [[BLOCK47]], [[SPLAT_SPLAT49]]
 ; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[COL_LOAD10]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <2 x double> poison, double [[TMP44]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <2 x double> poison, double [[TMP44]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT51]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = fmul <2 x double> [[BLOCK50]], [[SPLAT_SPLAT52]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd <2 x double> [[TMP43]], [[TMP45]]
 ; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[COL_LOAD13]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <2 x double> poison, double [[TMP47]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <2 x double> poison, double [[TMP47]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT54]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP48:%.*]] = fmul <2 x double> [[BLOCK53]], [[SPLAT_SPLAT55]]
 ; CHECK-NEXT:    [[TMP49:%.*]] = fadd <2 x double> [[TMP46]], [[TMP48]]
@@ -402,18 +402,18 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP51:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP50]], <3 x i32> <i32 3, i32 4, i32 2>
 ; CHECK-NEXT:    [[BLOCK56:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP52:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT57:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT57:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT58:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT57]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP53:%.*]] = fmul <1 x double> [[BLOCK56]], [[SPLAT_SPLAT58]]
 ; CHECK-NEXT:    [[BLOCK59:%.*]] = shufflevector <3 x double> [[COL_LOAD10]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP54:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x double> poison, double [[TMP54]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x double> poison, double [[TMP54]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT60]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP55:%.*]] = fmul <1 x double> [[BLOCK59]], [[SPLAT_SPLAT61]]
 ; CHECK-NEXT:    [[TMP56:%.*]] = fadd <1 x double> [[TMP53]], [[TMP55]]
 ; CHECK-NEXT:    [[BLOCK62:%.*]] = shufflevector <3 x double> [[COL_LOAD13]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP57:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT63:%.*]] = insertelement <1 x double> poison, double [[TMP57]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT63:%.*]] = insertelement <1 x double> poison, double [[TMP57]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT64:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT63]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP58:%.*]] = fmul <1 x double> [[BLOCK62]], [[SPLAT_SPLAT64]]
 ; CHECK-NEXT:    [[TMP59:%.*]] = fadd <1 x double> [[TMP56]], [[TMP58]]
@@ -483,18 +483,18 @@ define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <6 x double> [[TMP3]], <6 x double> poison, <2 x i32> <i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <4 x double> [[SPLIT1]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT7]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul <2 x double> [[BLOCK6]], [[SPLAT_SPLAT8]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd <2 x double> [[TMP9]], [[TMP11]]
 ; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <4 x double> [[SPLIT2]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x double> [[SPLIT5]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT10]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = fmul <2 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = fadd <2 x double> [[TMP12]], [[TMP14]]
@@ -502,18 +502,18 @@ define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP16]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT13]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul <2 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
 ; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <4 x double> [[SPLIT1]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <2 x double> poison, double [[TMP20]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <2 x double> poison, double [[TMP20]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT16]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = fmul <2 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = fadd <2 x double> [[TMP19]], [[TMP21]]
 ; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <4 x double> [[SPLIT2]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT5]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <2 x double> poison, double [[TMP23]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <2 x double> poison, double [[TMP23]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT19]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <2 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <2 x double> [[TMP22]], [[TMP24]]
@@ -521,18 +521,18 @@ define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <4 x double> [[TMP17]], <4 x double> [[TMP26]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <2 x double> poison, double [[TMP28]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <2 x double> poison, double [[TMP28]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT23:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT22]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = fmul <2 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
 ; CHECK-NEXT:    [[BLOCK24:%.*]] = shufflevector <4 x double> [[SPLIT1]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <2 x double> poison, double [[TMP30]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <2 x double> poison, double [[TMP30]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT26:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT25]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul <2 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = fadd <2 x double> [[TMP29]], [[TMP31]]
 ; CHECK-NEXT:    [[BLOCK27:%.*]] = shufflevector <4 x double> [[SPLIT2]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <2 x double> [[SPLIT5]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <2 x double> poison, double [[TMP33]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <2 x double> poison, double [[TMP33]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT29:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT28]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP34:%.*]] = fmul <2 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
 ; CHECK-NEXT:    [[TMP35:%.*]] = fadd <2 x double> [[TMP32]], [[TMP34]]
@@ -540,18 +540,18 @@ define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[TMP37:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP36]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK30:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <2 x double> poison, double [[TMP38]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <2 x double> poison, double [[TMP38]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT32:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT31]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP39:%.*]] = fmul <2 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
 ; CHECK-NEXT:    [[BLOCK33:%.*]] = shufflevector <4 x double> [[SPLIT1]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <2 x double> poison, double [[TMP40]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <2 x double> poison, double [[TMP40]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT35:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT34]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP41:%.*]] = fmul <2 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
 ; CHECK-NEXT:    [[TMP42:%.*]] = fadd <2 x double> [[TMP39]], [[TMP41]]
 ; CHECK-NEXT:    [[BLOCK36:%.*]] = shufflevector <4 x double> [[SPLIT2]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <2 x double> [[SPLIT5]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <2 x double> poison, double [[TMP43]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <2 x double> poison, double [[TMP43]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT38:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT37]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = fmul <2 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
 ; CHECK-NEXT:    [[TMP45:%.*]] = fadd <2 x double> [[TMP42]], [[TMP44]]
@@ -616,18 +616,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[COL_LOAD78:%.*]] = load <3 x double>, ptr [[VEC_GEP76]], align 16
 ; CHECK-NEXT:    [[BLOCK79:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <3 x double> [[COL_LOAD64]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT80:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT80:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT81:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT80]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[BLOCK79]], [[SPLAT_SPLAT81]]
 ; CHECK-NEXT:    [[BLOCK82:%.*]] = shufflevector <3 x double> [[COL_LOAD75]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <3 x double> [[COL_LOAD64]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT83:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT83:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT84:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT83]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[BLOCK82]], [[SPLAT_SPLAT84]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[BLOCK85:%.*]] = shufflevector <3 x double> [[COL_LOAD78]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <3 x double> [[COL_LOAD64]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT86:%.*]] = insertelement <2 x double> poison, double [[TMP11]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT86:%.*]] = insertelement <2 x double> poison, double [[TMP11]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT87:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT86]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul <2 x double> [[BLOCK85]], [[SPLAT_SPLAT87]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = fadd <2 x double> [[TMP10]], [[TMP12]]
@@ -635,18 +635,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP14]], <3 x i32> <i32 3, i32 4, i32 2>
 ; CHECK-NEXT:    [[BLOCK88:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD64]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT89:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT89:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT90:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT89]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[BLOCK88]], [[SPLAT_SPLAT90]]
 ; CHECK-NEXT:    [[BLOCK91:%.*]] = shufflevector <3 x double> [[COL_LOAD75]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD64]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT92:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT92:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT93:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT92]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul <1 x double> [[BLOCK91]], [[SPLAT_SPLAT93]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = fadd <1 x double> [[TMP17]], [[TMP19]]
 ; CHECK-NEXT:    [[BLOCK94:%.*]] = shufflevector <3 x double> [[COL_LOAD78]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <3 x double> [[COL_LOAD64]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT95:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT95:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT96:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT95]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[BLOCK94]], [[SPLAT_SPLAT96]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = fadd <1 x double> [[TMP20]], [[TMP22]]
@@ -654,18 +654,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <3 x double> [[TMP15]], <3 x double> [[TMP24]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK97:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <3 x double> [[COL_LOAD67]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT98:%.*]] = insertelement <2 x double> poison, double [[TMP26]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT98:%.*]] = insertelement <2 x double> poison, double [[TMP26]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT99:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT98]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP27:%.*]] = fmul <2 x double> [[BLOCK97]], [[SPLAT_SPLAT99]]
 ; CHECK-NEXT:    [[BLOCK100:%.*]] = shufflevector <3 x double> [[COL_LOAD75]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD67]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT101:%.*]] = insertelement <2 x double> poison, double [[TMP28]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT101:%.*]] = insertelement <2 x double> poison, double [[TMP28]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT102:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT101]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = fmul <2 x double> [[BLOCK100]], [[SPLAT_SPLAT102]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = fadd <2 x double> [[TMP27]], [[TMP29]]
 ; CHECK-NEXT:    [[BLOCK103:%.*]] = shufflevector <3 x double> [[COL_LOAD78]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <3 x double> [[COL_LOAD67]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT104:%.*]] = insertelement <2 x double> poison, double [[TMP31]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT104:%.*]] = insertelement <2 x double> poison, double [[TMP31]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT105:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT104]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP32:%.*]] = fmul <2 x double> [[BLOCK103]], [[SPLAT_SPLAT105]]
 ; CHECK-NEXT:    [[TMP33:%.*]] = fadd <2 x double> [[TMP30]], [[TMP32]]
@@ -673,18 +673,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP34]], <3 x i32> <i32 3, i32 4, i32 2>
 ; CHECK-NEXT:    [[BLOCK106:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <3 x double> [[COL_LOAD67]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT107:%.*]] = insertelement <1 x double> poison, double [[TMP36]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT107:%.*]] = insertelement <1 x double> poison, double [[TMP36]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT108:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT107]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP37:%.*]] = fmul <1 x double> [[BLOCK106]], [[SPLAT_SPLAT108]]
 ; CHECK-NEXT:    [[BLOCK109:%.*]] = shufflevector <3 x double> [[COL_LOAD75]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD67]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT110:%.*]] = insertelement <1 x double> poison, double [[TMP38]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT110:%.*]] = insertelement <1 x double> poison, double [[TMP38]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT111:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT110]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP39:%.*]] = fmul <1 x double> [[BLOCK109]], [[SPLAT_SPLAT111]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = fadd <1 x double> [[TMP37]], [[TMP39]]
 ; CHECK-NEXT:    [[BLOCK112:%.*]] = shufflevector <3 x double> [[COL_LOAD78]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <3 x double> [[COL_LOAD67]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT113:%.*]] = insertelement <1 x double> poison, double [[TMP41]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT113:%.*]] = insertelement <1 x double> poison, double [[TMP41]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT114:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT113]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42:%.*]] = fmul <1 x double> [[BLOCK112]], [[SPLAT_SPLAT114]]
 ; CHECK-NEXT:    [[TMP43:%.*]] = fadd <1 x double> [[TMP40]], [[TMP42]]
@@ -692,18 +692,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP45:%.*]] = shufflevector <3 x double> [[TMP35]], <3 x double> [[TMP44]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK115:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP46:%.*]] = extractelement <3 x double> [[COL_LOAD70]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT116:%.*]] = insertelement <2 x double> poison, double [[TMP46]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT116:%.*]] = insertelement <2 x double> poison, double [[TMP46]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT117:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT116]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP47:%.*]] = fmul <2 x double> [[BLOCK115]], [[SPLAT_SPLAT117]]
 ; CHECK-NEXT:    [[BLOCK118:%.*]] = shufflevector <3 x double> [[COL_LOAD75]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD70]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT119:%.*]] = insertelement <2 x double> poison, double [[TMP48]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT119:%.*]] = insertelement <2 x double> poison, double [[TMP48]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT120:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT119]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP49:%.*]] = fmul <2 x double> [[BLOCK118]], [[SPLAT_SPLAT120]]
 ; CHECK-NEXT:    [[TMP50:%.*]] = fadd <2 x double> [[TMP47]], [[TMP49]]
 ; CHECK-NEXT:    [[BLOCK121:%.*]] = shufflevector <3 x double> [[COL_LOAD78]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <3 x double> [[COL_LOAD70]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT122:%.*]] = insertelement <2 x double> poison, double [[TMP51]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT122:%.*]] = insertelement <2 x double> poison, double [[TMP51]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT123:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT122]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = fmul <2 x double> [[BLOCK121]], [[SPLAT_SPLAT123]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = fadd <2 x double> [[TMP50]], [[TMP52]]
@@ -711,18 +711,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP54]], <3 x i32> <i32 3, i32 4, i32 2>
 ; CHECK-NEXT:    [[BLOCK124:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <3 x double> [[COL_LOAD70]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT125:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT125:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT126:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT125]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP57:%.*]] = fmul <1 x double> [[BLOCK124]], [[SPLAT_SPLAT126]]
 ; CHECK-NEXT:    [[BLOCK127:%.*]] = shufflevector <3 x double> [[COL_LOAD75]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD70]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT128:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT128:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT129:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT128]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul <1 x double> [[BLOCK127]], [[SPLAT_SPLAT129]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]]
 ; CHECK-NEXT:    [[BLOCK130:%.*]] = shufflevector <3 x double> [[COL_LOAD78]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP61:%.*]] = extractelement <3 x double> [[COL_LOAD70]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT131:%.*]] = insertelement <1 x double> poison, double [[TMP61]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT131:%.*]] = insertelement <1 x double> poison, double [[TMP61]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT132:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT131]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP62:%.*]] = fmul <1 x double> [[BLOCK130]], [[SPLAT_SPLAT132]]
 ; CHECK-NEXT:    [[TMP63:%.*]] = fadd <1 x double> [[TMP60]], [[TMP62]]
@@ -739,18 +739,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[SPLIT6:%.*]] = shufflevector <9 x double> [[TMP3]], <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP69:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP69]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP69]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP70:%.*]] = fmul <2 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <3 x double> [[SPLIT2]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP71:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <2 x double> poison, double [[TMP71]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <2 x double> poison, double [[TMP71]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT8]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP72:%.*]] = fmul <2 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[TMP73:%.*]] = fadd <2 x double> [[TMP70]], [[TMP72]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP74]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP74]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP75:%.*]] = fmul <2 x double> [[BLOCK10]], [[SPLAT_SPLAT12]]
 ; CHECK-NEXT:    [[TMP76:%.*]] = fadd <2 x double> [[TMP73]], [[TMP75]]
@@ -758,18 +758,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP78:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP77]], <3 x i32> <i32 3, i32 4, i32 2>
 ; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP79:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP79]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP79]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP80:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
 ; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <3 x double> [[SPLIT2]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP81]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP81]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP82:%.*]] = fmul <1 x double> [[BLOCK16]], [[SPLAT_SPLAT18]]
 ; CHECK-NEXT:    [[TMP83:%.*]] = fadd <1 x double> [[TMP80]], [[TMP82]]
 ; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP84:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP84]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP84]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP85:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
 ; CHECK-NEXT:    [[TMP86:%.*]] = fadd <1 x double> [[TMP83]], [[TMP85]]
@@ -777,18 +777,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP88:%.*]] = shufflevector <3 x double> [[TMP78]], <3 x double> [[TMP87]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP89:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <2 x double> poison, double [[TMP89]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <2 x double> poison, double [[TMP89]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT23]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP90:%.*]] = fmul <2 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
 ; CHECK-NEXT:    [[BLOCK25:%.*]] = shufflevector <3 x double> [[SPLIT2]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP91:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <2 x double> poison, double [[TMP91]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <2 x double> poison, double [[TMP91]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT26]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP92:%.*]] = fmul <2 x double> [[BLOCK25]], [[SPLAT_SPLAT27]]
 ; CHECK-NEXT:    [[TMP93:%.*]] = fadd <2 x double> [[TMP90]], [[TMP92]]
 ; CHECK-NEXT:    [[BLOCK28:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP94:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT29:%.*]] = insertelement <2 x double> poison, double [[TMP94]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT29:%.*]] = insertelement <2 x double> poison, double [[TMP94]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT30:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT29]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP95:%.*]] = fmul <2 x double> [[BLOCK28]], [[SPLAT_SPLAT30]]
 ; CHECK-NEXT:    [[TMP96:%.*]] = fadd <2 x double> [[TMP93]], [[TMP95]]
@@ -796,18 +796,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP98:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP97]], <3 x i32> <i32 3, i32 4, i32 2>
 ; CHECK-NEXT:    [[BLOCK31:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP99:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT32:%.*]] = insertelement <1 x double> poison, double [[TMP99]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT32:%.*]] = insertelement <1 x double> poison, double [[TMP99]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT33:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT32]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP100:%.*]] = fmul <1 x double> [[BLOCK31]], [[SPLAT_SPLAT33]]
 ; CHECK-NEXT:    [[BLOCK34:%.*]] = shufflevector <3 x double> [[SPLIT2]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP101:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT35:%.*]] = insertelement <1 x double> poison, double [[TMP101]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT35:%.*]] = insertelement <1 x double> poison, double [[TMP101]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT36:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT35]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP102:%.*]] = fmul <1 x double> [[BLOCK34]], [[SPLAT_SPLAT36]]
 ; CHECK-NEXT:    [[TMP103:%.*]] = fadd <1 x double> [[TMP100]], [[TMP102]]
 ; CHECK-NEXT:    [[BLOCK37:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT38:%.*]] = insertelement <1 x double> poison, double [[TMP104]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT38:%.*]] = insertelement <1 x double> poison, double [[TMP104]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT39:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT38]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP105:%.*]] = fmul <1 x double> [[BLOCK37]], [[SPLAT_SPLAT39]]
 ; CHECK-NEXT:    [[TMP106:%.*]] = fadd <1 x double> [[TMP103]], [[TMP105]]
@@ -815,18 +815,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP108:%.*]] = shufflevector <3 x double> [[TMP98]], <3 x double> [[TMP107]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[BLOCK40:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP109:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT41:%.*]] = insertelement <2 x double> poison, double [[TMP109]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT41:%.*]] = insertelement <2 x double> poison, double [[TMP109]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT42:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT41]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP110:%.*]] = fmul <2 x double> [[BLOCK40]], [[SPLAT_SPLAT42]]
 ; CHECK-NEXT:    [[BLOCK43:%.*]] = shufflevector <3 x double> [[SPLIT2]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP111:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT44:%.*]] = insertelement <2 x double> poison, double [[TMP111]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT44:%.*]] = insertelement <2 x double> poison, double [[TMP111]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT45:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT44]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP112:%.*]] = fmul <2 x double> [[BLOCK43]], [[SPLAT_SPLAT45]]
 ; CHECK-NEXT:    [[TMP113:%.*]] = fadd <2 x double> [[TMP110]], [[TMP112]]
 ; CHECK-NEXT:    [[BLOCK46:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP114:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT47:%.*]] = insertelement <2 x double> poison, double [[TMP114]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT47:%.*]] = insertelement <2 x double> poison, double [[TMP114]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT48:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT47]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP115:%.*]] = fmul <2 x double> [[BLOCK46]], [[SPLAT_SPLAT48]]
 ; CHECK-NEXT:    [[TMP116:%.*]] = fadd <2 x double> [[TMP113]], [[TMP115]]
@@ -834,18 +834,18 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) {
 ; CHECK-NEXT:    [[TMP118:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP117]], <3 x i32> <i32 3, i32 4, i32 2>
 ; CHECK-NEXT:    [[BLOCK49:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP119:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT50:%.*]] = insertelement <1 x double> poison, double [[TMP119]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT50:%.*]] = insertelement <1 x double> poison, double [[TMP119]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT51:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT50]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP120:%.*]] = fmul <1 x double> [[BLOCK49]], [[SPLAT_SPLAT51]]
 ; CHECK-NEXT:    [[BLOCK52:%.*]] = shufflevector <3 x double> [[SPLIT2]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP121:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT53:%.*]] = insertelement <1 x double> poison, double [[TMP121]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT53:%.*]] = insertelement <1 x double> poison, double [[TMP121]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT54:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT53]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP122:%.*]] = fmul <1 x double> [[BLOCK52]], [[SPLAT_SPLAT54]]
 ; CHECK-NEXT:    [[TMP123:%.*]] = fadd <1 x double> [[TMP120]], [[TMP122]]
 ; CHECK-NEXT:    [[BLOCK55:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP124:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 2
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT56:%.*]] = insertelement <1 x double> poison, double [[TMP124]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT56:%.*]] = insertelement <1 x double> poison, double [[TMP124]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT57:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT56]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP125:%.*]] = fmul <1 x double> [[BLOCK55]], [[SPLAT_SPLAT57]]
 ; CHECK-NEXT:    [[TMP126:%.*]] = fadd <1 x double> [[TMP123]], [[TMP125]]
@@ -880,12 +880,12 @@ define <4 x float> @mult_tt_same_op(<4 x float> %a) {
 ; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x float> [[SPLAT_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <2 x float> [[SPLAT_SPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x float> [[BLOCK4]], [[SPLAT_SPLAT6]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x float> [[TMP1]], [[TMP3]]
@@ -893,12 +893,12 @@ define <4 x float> @mult_tt_same_op(<4 x float> %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP5]], <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <2 x float> [[SPLAT_SPLATINSERT8]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
 ; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x float> [[SPLAT_SPLATINSERT11]], <2 x float> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <2 x float> [[BLOCK10]], [[SPLAT_SPLAT12]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x float> [[TMP8]], [[TMP10]]
@@ -952,12 +952,12 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) {
 ; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> <i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[BLOCK]], [[SPLAT_SPLAT]]
 ; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <4 x double> [[SPLIT1]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <2 x double> poison, double [[TMP3]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <2 x double> poison, double [[TMP3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT6]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[BLOCK5]], [[SPLAT_SPLAT7]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
@@ -965,12 +965,12 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP6]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT9]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[BLOCK8]], [[SPLAT_SPLAT10]]
 ; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <4 x double> [[SPLIT1]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT12]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul <2 x double> [[BLOCK11]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd <2 x double> [[TMP9]], [[TMP11]]
@@ -978,12 +978,12 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) {
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP15]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP15]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = fmul <2 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <4 x double> [[SPLIT1]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP17]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP17]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = fmul <2 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = fadd <2 x double> [[TMP16]], [[TMP18]]
@@ -991,12 +991,12 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) {
 ; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP20]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <2 x double> poison, double [[TMP22]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <2 x double> poison, double [[TMP22]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT21]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = fmul <2 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
 ; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <4 x double> [[SPLIT1]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <2 x double> poison, double [[TMP24]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <2 x double> poison, double [[TMP24]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT24]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = fmul <2 x double> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = fadd <2 x double> [[TMP23]], [[TMP25]]
@@ -1004,12 +1004,12 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) {
 ; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <4 x double> [[TMP21]], <4 x double> [[TMP27]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <2 x double> poison, double [[TMP29]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <2 x double> poison, double [[TMP29]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT27]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP30:%.*]] = fmul <2 x double> [[BLOCK26]], [[SPLAT_SPLAT28]]
 ; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <4 x double> [[SPLIT1]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <2 x double> poison, double [[TMP31]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <2 x double> poison, double [[TMP31]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT30]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP32:%.*]] = fmul <2 x double> [[BLOCK29]], [[SPLAT_SPLAT31]]
 ; CHECK-NEXT:    [[TMP33:%.*]] = fadd <2 x double> [[TMP30]], [[TMP32]]
@@ -1017,12 +1017,12 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) {
 ; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP34]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <2 x double> poison, double [[TMP36]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <2 x double> poison, double [[TMP36]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT33]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP37:%.*]] = fmul <2 x double> [[BLOCK32]], [[SPLAT_SPLAT34]]
 ; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <4 x double> [[SPLIT1]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
-; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <2 x double> poison, double [[TMP38]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <2 x double> poison, double [[TMP38]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT36]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP39:%.*]] = fmul <2 x double> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = fadd <2 x double> [[TMP37]], [[TMP39]]

diff  --git a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll
index cb3853bb30843..b27c0b1131861 100644
--- a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll
@@ -14,8 +14,8 @@ define ptr addrspace(1) @test1(ptr addrspace(1) %base1, <2 x i64> %offsets, i1 %
 ; CHECK-NEXT:    br label [[SECOND]]
 ; CHECK:       second:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi ptr addrspace(1) [ [[BASE1:%.*]], [[ENTRY:%.*]] ], [ [[BASE21]], [[FIRST]] ]
-; CHECK-NEXT:    [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[PHI]], i32 0, !is_base_value !0
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x ptr addrspace(1)> poison, ptr addrspace(1) [[PHI]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[PHI]], i64 0, !is_base_value !0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x ptr addrspace(1)> poison, ptr addrspace(1) [[PHI]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x ptr addrspace(1)> [[DOTSPLATINSERT_BASE]], <2 x ptr addrspace(1)> undef, <2 x i32> zeroinitializer, !is_base_value !0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x ptr addrspace(1)> [[DOTSPLATINSERT]], <2 x ptr addrspace(1)> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC:%.*]] = getelementptr i32, <2 x ptr addrspace(1)> [[DOTSPLAT]], <2 x i64> [[OFFSETS:%.*]]
@@ -44,8 +44,8 @@ second:
 define ptr addrspace(1) @test2(ptr addrspace(1) %base, <2 x i64> %offsets) gc "statepoint-example" {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[BASE:%.*]], i32 0, !is_base_value !0
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x ptr addrspace(1)> poison, ptr addrspace(1) [[BASE]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[BASE:%.*]], i64 0, !is_base_value !0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x ptr addrspace(1)> poison, ptr addrspace(1) [[BASE]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x ptr addrspace(1)> [[DOTSPLATINSERT_BASE]], <2 x ptr addrspace(1)> undef, <2 x i32> zeroinitializer, !is_base_value !0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x ptr addrspace(1)> [[DOTSPLATINSERT]], <2 x ptr addrspace(1)> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC:%.*]] = getelementptr i32, <2 x ptr addrspace(1)> [[DOTSPLAT]], <2 x i64> [[OFFSETS:%.*]]
@@ -86,8 +86,8 @@ entry:
 define ptr addrspace(1) @test4(ptr addrspace(1) %base, <2 x i64> %offsets) gc "statepoint-example" {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[BASE:%.*]], i32 0, !is_base_value !0
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x ptr addrspace(1)> poison, ptr addrspace(1) [[BASE]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[BASE:%.*]], i64 0, !is_base_value !0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x ptr addrspace(1)> poison, ptr addrspace(1) [[BASE]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT_BASE:%.*]] = shufflevector <2 x ptr addrspace(1)> [[DOTSPLATINSERT_BASE]], <2 x ptr addrspace(1)> undef, <2 x i32> zeroinitializer, !is_base_value !0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x ptr addrspace(1)> [[DOTSPLATINSERT]], <2 x ptr addrspace(1)> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC:%.*]] = getelementptr i32, <2 x ptr addrspace(1)> [[DOTSPLAT]], <2 x i64> [[OFFSETS:%.*]]

diff  --git a/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll b/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll
index a5a3135131521..da044957c94a3 100644
--- a/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/vector-nonlive-clobber.ll
@@ -7,8 +7,8 @@ define void @test_vector_clobber(ptr addrspace(1) %ptr) gc "statepoint-example"
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(void ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "deopt"(i32 0, i32 2, i32 0, i32 62, i32 0, i32 13, i32 0, i32 7, ptr null, i32 7, ptr null, i32 7, ptr null, i32 3, i32 14, i32 3, i32 -2406, i32 3, i32 28963, i32 3, i32 30401, i32 3, i32 -11, i32 3, i32 -5, i32 3, i32 1, i32 0, ptr addrspace(1) [[PTR:%.*]], i32 0, ptr addrspace(1) [[PTR]], i32 7, ptr null), "gc-live"(ptr addrspace(1) [[PTR]]) ]
 ; CHECK-NEXT:    [[PTR_RELOCATED:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN]], i32 0, i32 0)
-; CHECK-NEXT:    [[DOTSPLATINSERT_BASE:%.*]] = insertelement <8 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[PTR_RELOCATED]], i32 0, !is_base_value !0
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x ptr addrspace(1)> poison, ptr addrspace(1) [[PTR_RELOCATED]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT_BASE:%.*]] = insertelement <8 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[PTR_RELOCATED]], i64 0, !is_base_value !0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x ptr addrspace(1)> poison, ptr addrspace(1) [[PTR_RELOCATED]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT_BASE:%.*]] = shufflevector <8 x ptr addrspace(1)> [[DOTSPLATINSERT_BASE]], <8 x ptr addrspace(1)> undef, <8 x i32> zeroinitializer, !is_base_value !0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x ptr addrspace(1)> [[DOTSPLATINSERT]], <8 x ptr addrspace(1)> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, <8 x ptr addrspace(1)> [[DOTSPLAT]], <8 x i64> <i64 83, i64 81, i64 79, i64 77, i64 75, i64 73, i64 71, i64 69>

diff  --git a/llvm/test/Transforms/Scalarizer/vector-gep.ll b/llvm/test/Transforms/Scalarizer/vector-gep.ll
index cdde5d1786d4f..48798e4582f28 100644
--- a/llvm/test/Transforms/Scalarizer/vector-gep.ll
+++ b/llvm/test/Transforms/Scalarizer/vector-gep.ll
@@ -40,7 +40,7 @@ define void @test2() {
 ; CHECK-NEXT:    [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2
 ; CHECK-NEXT:    [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3
 ; CHECK-NEXT:    [[INDEX:%.*]] = load i16, ptr @index, align 2
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[INDEX]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[DOTSPLAT_I0:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i32 0
 ; CHECK-NEXT:    [[DOTI01:%.*]] = getelementptr i16, ptr [[DOTI0]], i16 [[DOTSPLAT_I0]]
@@ -77,7 +77,7 @@ define <4 x ptr> @test3_constbase(i16 %idx) {
 ; CHECK-LABEL: @test3_constbase(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[OFFSET:%.*]] = getelementptr [4 x i16], ptr @ptr, i16 0, i16 [[IDX:%.*]]
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[OFFSET]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[OFFSET]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[DOTSPLAT_I0:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 0
 ; CHECK-NEXT:    [[GEP_I0:%.*]] = getelementptr i16, ptr [[DOTSPLAT_I0]], i16 0
@@ -127,7 +127,7 @@ define void @test4() {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @ptrptr, align 8
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[DOTSPLAT_I0:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 0
 ; CHECK-NEXT:    [[DOTI0:%.*]] = getelementptr i16, ptr [[DOTSPLAT_I0]], i16 0

diff  --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir
index eb6738e1497e4..17f649b9f6e20 100644
--- a/mlir/test/Target/LLVMIR/llvmir.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir.mlir
@@ -939,7 +939,7 @@ llvm.func @vector_splat_nonzero() -> vector<4xf32> {
 
 // CHECK-LABEL: @vector_splat_nonzero_scalable
 llvm.func @vector_splat_nonzero_scalable() -> vector<[4]xf32> {
-  // CHECK: ret <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+  // CHECK: ret <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
   %0 = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32>
   llvm.return %0 : vector<[4]xf32>
 }

diff  --git a/polly/test/CodeGen/invariant_load_hoist_alignment.ll b/polly/test/CodeGen/invariant_load_hoist_alignment.ll
index 59f92cbc361f9..ef6823be3421c 100644
--- a/polly/test/CodeGen/invariant_load_hoist_alignment.ll
+++ b/polly/test/CodeGen/invariant_load_hoist_alignment.ll
@@ -17,7 +17,7 @@ body:
   %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
   %scevgep = getelementptr [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvar
 ; CHECK: [[T2:%.load]] = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @A, i32 0, i32 0), align 4
-; CHECK: %value_p.splatinsert = insertelement <4 x i32> poison, i32 [[T2]], i32 0
+; CHECK: %value_p.splatinsert = insertelement <4 x i32> poison, i32 [[T2]], i64 0
   %value = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @A, i64 0, i64 0), align 4
   %result = tail call i32 @foo(i32 %value) nounwind
   store i32 %result, i32* %scevgep, align 4

diff  --git a/polly/test/CodeGen/simple_vec_cast.ll b/polly/test/CodeGen/simple_vec_cast.ll
index c3ee183fcedb4..1fe989d016c55 100644
--- a/polly/test/CodeGen/simple_vec_cast.ll
+++ b/polly/test/CodeGen/simple_vec_cast.ll
@@ -32,7 +32,7 @@ bb4:                                              ; preds = %bb1
 ; CHECK:   %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0)
 
 ; CHECK: polly.stmt.bb2:                                   ; preds = %polly.start
-; CHECK:   %tmp_p.splatinsert = insertelement <4 x float> poison, float %.load, i32 0
+; CHECK:   %tmp_p.splatinsert = insertelement <4 x float> poison, float %.load, i64 0
 ; CHECK:   %tmp_p.splat = shufflevector <4 x float> %tmp_p.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK:   %0 = fpext <4 x float> %tmp_p.splat to <4 x double>
 ; CHECK:   store <4 x double> %0, <4 x double>*

diff  --git a/polly/test/CodeGen/simple_vec_const.ll b/polly/test/CodeGen/simple_vec_const.ll
index f3641c1f3931a..c9b134d3384ba 100644
--- a/polly/test/CodeGen/simple_vec_const.ll
+++ b/polly/test/CodeGen/simple_vec_const.ll
@@ -56,5 +56,5 @@ define i32 @main() nounwind {
 ; CHECK:   %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0)
 
 ; CHECK: polly.stmt.:                                      ; preds = %polly.start
-; CHECK:   %_p.splatinsert = insertelement <4 x float> poison, float %.load, i32 0
+; CHECK:   %_p.splatinsert = insertelement <4 x float> poison, float %.load, i64 0
 ; CHECK:   %_p.splat = shufflevector <4 x float> %_p.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer

diff  --git a/polly/test/CodeGen/simple_vec_ptr_ptr_ty.ll b/polly/test/CodeGen/simple_vec_ptr_ptr_ty.ll
index b645ece1ceff6..d55676578897a 100644
--- a/polly/test/CodeGen/simple_vec_ptr_ptr_ty.ll
+++ b/polly/test/CodeGen/simple_vec_ptr_ptr_ty.ll
@@ -26,6 +26,6 @@ return:
 ; CHECK:   %.load = load float**, float*** getelementptr inbounds ([1024 x float**], [1024 x float**]* @A, i32 0, i32 0)
 
 ; CHECK-NOT: load <1 x float**>
-; CHECK: %value_p.splatinsert = insertelement <4 x float**> poison, float** %.load, i32 0
+; CHECK: %value_p.splatinsert = insertelement <4 x float**> poison, float** %.load, i64 0
 ; CHECK: %value_p.splat = shufflevector <4 x float**> %value_p.splatinsert, <4 x float**> poison, <4 x i32> zeroinitializer
 ; CHECK: store <4 x float**> %value_p.splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8


        


More information about the llvm-commits mailing list