[clang] 9b29610 - Use unary CreateShuffleVector if possible

Juneyoung Lee via cfe-commits cfe-commits at lists.llvm.org
Wed Dec 30 06:06:19 PST 2020


Author: Juneyoung Lee
Date: 2020-12-30T22:36:08+09:00
New Revision: 9b29610228c838a66a88edf43ddd25acf8d1b477

URL: https://github.com/llvm/llvm-project/commit/9b29610228c838a66a88edf43ddd25acf8d1b477
DIFF: https://github.com/llvm/llvm-project/commit/9b29610228c838a66a88edf43ddd25acf8d1b477.diff

LOG: Use unary CreateShuffleVector if possible

As mentioned in D93793, there are quite a few places where unary `IRBuilder::CreateShuffleVector(X, Mask)` can be used
instead of `IRBuilder::CreateShuffleVector(X, Undef, Mask)`.
Let's update them.

Actually, it would have been more natural if the patches were made in this order:
(1) let them use unary CreateShuffleVector first
(2) update IRBuilder::CreateShuffleVector to use poison as a placeholder value (D93793)

The order is swapped, but in terms of correctness it is still fine.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D93923

Added: 
    

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/CodeGen/CGExpr.cpp
    clang/lib/CodeGen/CGExprScalar.cpp
    clang/test/CodeGen/X86/avx-builtins.c
    clang/test/CodeGen/X86/avx2-builtins.c
    clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
    clang/test/CodeGen/X86/avx512bw-builtins.c
    clang/test/CodeGen/X86/avx512dq-builtins.c
    clang/test/CodeGen/X86/avx512f-builtins.c
    clang/test/CodeGen/X86/avx512vl-builtins-constrained.c
    clang/test/CodeGen/X86/avx512vl-builtins.c
    clang/test/CodeGen/X86/avx512vlbw-builtins.c
    clang/test/CodeGen/X86/avx512vldq-builtins.c
    clang/test/CodeGen/X86/f16c-builtins-constrained.c
    clang/test/CodeGen/X86/f16c-builtins.c
    clang/test/CodeGen/X86/sse2-builtins.c
    clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
    clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
    clang/test/CodeGen/arm-mve-intrinsics/vrev.c
    clang/test/CodeGen/arm64-abi-vector.c
    clang/test/CodeGenOpenCL/as_type.cl
    clang/test/CodeGenOpenCL/partial_initializer.cl
    clang/test/CodeGenOpenCL/preserve_vec3.cl
    clang/test/CodeGenOpenCL/vectorLoadStore.cl
    clang/test/CodeGenOpenCL/vector_literals_valid.cl
    llvm/lib/Analysis/VectorUtils.cpp
    llvm/lib/CodeGen/CodeGenPrepare.cpp
    llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
    llvm/lib/IR/AutoUpgrade.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
    llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
    llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
    llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
    llvm/lib/Target/X86/X86InterleavedAccess.cpp
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
    llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
    llvm/lib/Transforms/Scalar/SROA.cpp
    llvm/lib/Transforms/Utils/LoopUtils.cpp
    llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll
    llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
    llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll
    llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
    llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
    llvm/test/Instrumentation/MemorySanitizer/clmul.ll
    llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll
    llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
    llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
    llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll
    llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
    llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-inseltpoison.ll
    llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
    llvm/test/Transforms/InterleavedAccess/X86/interleavedStore-inseltpoison.ll
    llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll
    llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
    llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
    llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
    llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
    llvm/test/Transforms/LoopVectorize/reduction.ll
    llvm/test/Transforms/LoopVectorize/select-reduction.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backward.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/propagate-forward.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/propagate-mixed-users.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/store-align-volatile.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-float.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-i32.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double-row-major.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float-row-major.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32-row-major.ll
    llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32.ll
    llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
    llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll
    llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll
    llvm/test/Transforms/SROA/vector-promotion.ll
    llvm/unittests/IR/PatternMatch.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0c02dbfe8469..6e4c31be84c1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7741,8 +7741,7 @@ static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
       cast<llvm::FixedVectorType>(V->getType())->getNumElements();
   for (unsigned i = 0; i < InputElements; i += 2)
     Indices.push_back(i + Odd);
-  return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()),
-                                     Indices);
+  return Builder.CreateShuffleVector(V, Indices);
 }
 
 static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
@@ -7783,8 +7782,7 @@ static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
   unsigned Mask = ReverseWidth / LaneSize - 1;
   for (unsigned i = 0; i < Elements; i++)
     Indices.push_back(i ^ Mask);
-  return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()),
-                                     Indices);
+  return Builder.CreateShuffleVector(V, Indices);
 }
 
 Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
@@ -11808,8 +11806,7 @@ static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
   if (NumDstElts !=
       cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
     assert(NumDstElts == 4 && "Unexpected vector size");
-    Src = CGF.Builder.CreateShuffleVector(Src, UndefValue::get(Src->getType()),
-                                          ArrayRef<int>{0, 1, 2, 3});
+    Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
   }
 
   // Bitcast from vXi16 to vXf16.
@@ -12700,7 +12697,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       Indices[i] = i + Index;
 
     Value *Res = Builder.CreateShuffleVector(Ops[0],
-                                             UndefValue::get(Ops[0]->getType()),
                                              makeArrayRef(Indices, NumElts),
                                              "extract");
 
@@ -12740,7 +12736,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
 
     Value *Op1 = Builder.CreateShuffleVector(Ops[1],
-                                             UndefValue::get(Ops[1]->getType()),
                                              makeArrayRef(Indices, DstNumElts),
                                              "widen");
 
@@ -12826,8 +12821,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
         Indices[l + i] = l + i;
     }
 
-    return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
-                                       makeArrayRef(Indices, NumElts),
+    return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
                                        "pshuflw");
   }
   case X86::BI__builtin_ia32_pshufhw:
@@ -12850,8 +12844,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       }
     }
 
-    return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
-                                       makeArrayRef(Indices, NumElts),
+    return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
                                        "pshufhw");
   }
   case X86::BI__builtin_ia32_pshufd:
@@ -12880,8 +12873,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       }
     }
 
-    return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
-                                       makeArrayRef(Indices, NumElts),
+    return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
                                        "permil");
   }
   case X86::BI__builtin_ia32_shufpd:
@@ -12928,8 +12920,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       for (unsigned i = 0; i != 4; ++i)
         Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
 
-    return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
-                                       makeArrayRef(Indices, NumElts),
+    return Builder.CreateShuffleVector(Ops[0], makeArrayRef(Indices, NumElts),
                                        "perm");
   }
   case X86::BI__builtin_ia32_palignr128:

diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 6ca3a4e04dd4..3013fffcbf6d 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1728,8 +1728,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
         llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
 
         // Shuffle vector to get vec3.
-        V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
-                                        ArrayRef<int>{0, 1, 2}, "extractVec");
+        V = Builder.CreateShuffleVector(V, ArrayRef<int>{0, 1, 2},
+                                        "extractVec");
         return EmitFromMemory(V, Ty);
       }
     }
@@ -1836,8 +1836,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
       // Handle vec3 special.
       if (VecTy && cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) {
         // Our source is a vec3, do a shuffle vector to make it a vec4.
-        Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
-                                            ArrayRef<int>{0, 1, 2, -1},
+        Value = Builder.CreateShuffleVector(Value, ArrayRef<int>{0, 1, 2, -1},
                                             "extractVec");
         SrcTy = llvm::FixedVectorType::get(VecTy->getElementType(), 4);
       }
@@ -2009,8 +2008,7 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
   for (unsigned i = 0; i != NumResultElts; ++i)
     Mask.push_back(getAccessedFieldNo(i, Elts));
 
-  Vec = Builder.CreateShuffleVector(Vec, llvm::UndefValue::get(Vec->getType()),
-                                    Mask);
+  Vec = Builder.CreateShuffleVector(Vec, Mask);
   return RValue::get(Vec);
 }
 
@@ -2266,8 +2264,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
       for (unsigned i = 0; i != NumSrcElts; ++i)
         Mask[getAccessedFieldNo(i, Elts)] = i;
 
-      Vec = Builder.CreateShuffleVector(
-          SrcVal, llvm::UndefValue::get(Vec->getType()), Mask);
+      Vec = Builder.CreateShuffleVector(SrcVal, Mask);
     } else if (NumDstElts > NumSrcElts) {
       // Extended the source vector to the same length and then shuffle it
       // into the destination.
@@ -2277,8 +2274,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
       for (unsigned i = 0; i != NumSrcElts; ++i)
         ExtMask.push_back(i);
       ExtMask.resize(NumDstElts, -1);
-      llvm::Value *ExtSrcVal = Builder.CreateShuffleVector(
-          SrcVal, llvm::UndefValue::get(SrcVal->getType()), ExtMask);
+      llvm::Value *ExtSrcVal = Builder.CreateShuffleVector(SrcVal, ExtMask);
       // build identity
       SmallVector<int, 4> Mask;
       for (unsigned i = 0; i != NumDstElts; ++i)

diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index c9cf1d0dfd89..afb03774132e 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1857,8 +1857,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
       for (unsigned j = 0; j != InitElts; ++j)
         Args.push_back(j);
       Args.resize(ResElts, -1);
-      Init = Builder.CreateShuffleVector(Init, llvm::UndefValue::get(VVT), Args,
-                                         "vext");
+      Init = Builder.CreateShuffleVector(Init, Args, "vext");
 
       Args.clear();
       for (unsigned j = 0; j != CurIdx; ++j)
@@ -4556,9 +4555,8 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) {
 // Convert a vec3 to vec4, or vice versa.
 static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF,
                                  Value *Src, unsigned NumElementsDst) {
-  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
   static constexpr int Mask[] = {0, 1, 2, -1};
-  return Builder.CreateShuffleVector(Src, UnV,
+  return Builder.CreateShuffleVector(Src,
                                      llvm::makeArrayRef(Mask, NumElementsDst));
 }
 

diff  --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 4dfa64396d47..4118d6c00b37 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1089,19 +1089,19 @@ long long test_mm256_extract_epi64(__m256i A) {
 
 __m128d test_mm256_extractf128_pd(__m256d A) {
   // CHECK-LABEL: test_mm256_extractf128_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
   return _mm256_extractf128_pd(A, 1);
 }
 
 __m128 test_mm256_extractf128_ps(__m256 A) {
   // CHECK-LABEL: test_mm256_extractf128_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm256_extractf128_ps(A, 1);
 }
 
 __m128i test_mm256_extractf128_si256(__m256i A) {
   // CHECK-LABEL: test_mm256_extractf128_si256
-  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm256_extractf128_si256(A, 1);
 }
 
@@ -1169,21 +1169,21 @@ __m256i test_mm256_insert_epi64(__m256i x, long long b) {
 
 __m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
   // CHECK-LABEL: test_mm256_insertf128_pd
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
   return _mm256_insertf128_pd(A, B, 0);
 }
 
 __m256 test_mm256_insertf128_ps(__m256 A, __m128 B) {
   // CHECK-LABEL: test_mm256_insertf128_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   return _mm256_insertf128_ps(A, B, 1);
 }
 
 __m256i test_mm256_insertf128_si256(__m256i A, __m128i B) {
   // CHECK-LABEL: test_mm256_insertf128_si256
-  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
   return _mm256_insertf128_si256(A, B, 0);
 }
@@ -1235,7 +1235,7 @@ __m256 test_mm256_loadu2_m128(float* A, float* B) {
   // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}}
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}}
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   return _mm256_loadu2_m128(A, B);
 }
@@ -1245,7 +1245,7 @@ __m256d test_mm256_loadu2_m128d(double* A, double* B) {
   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   return _mm256_loadu2_m128d(A, B);
 }
@@ -1255,7 +1255,7 @@ __m256i test_mm256_loadu2_m128i(__m128i* A, __m128i* B) {
   // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
-  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   return _mm256_loadu2_m128i(A, B);
 }
@@ -1388,32 +1388,32 @@ __m256 test_mm256_or_ps(__m256 A, __m256 B) {
 
 __m128d test_mm_permute_pd(__m128d A) {
   // CHECK-LABEL: test_mm_permute_pd
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 0>
   return _mm_permute_pd(A, 1);
 }
 
 __m256d test_mm256_permute_pd(__m256d A) {
   // CHECK-LABEL: test_mm256_permute_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   return _mm256_permute_pd(A, 5);
 }
 
 __m128 test_mm_permute_ps(__m128 A) {
   // CHECK-LABEL: test_mm_permute_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   return _mm_permute_ps(A, 0x1b);
 }
 
 // Test case for PR12401
 __m128 test2_mm_permute_ps(__m128 a) {
   // CHECK-LABEL: test2_mm_permute_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 2, i32 1, i32 2, i32 3>
   return _mm_permute_ps(a, 0xe6);
 }
 
 __m256 test_mm256_permute_ps(__m256 A) {
   // CHECK-LABEL: test_mm256_permute_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   return _mm256_permute_ps(A, 0x1b);
 }
 
@@ -1918,7 +1918,7 @@ void test_mm256_storeu2_m128(float* A, float* B, __m256 C) {
   // CHECK-LABEL: test_mm256_storeu2_m128
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
   _mm256_storeu2_m128(A, B, C);
 }
@@ -1927,7 +1927,7 @@ void test_mm256_storeu2_m128d(double* A, double* B, __m256d C) {
   // CHECK-LABEL: test_mm256_storeu2_m128d
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> <i32 0, i32 1>
   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1{{$}}
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1{{$}}
   _mm256_storeu2_m128d(A, B, C);
 }
@@ -1936,7 +1936,7 @@ void test_mm256_storeu2_m128i(__m128i* A, __m128i* B, __m256i C) {
   // CHECK-LABEL: test_mm256_storeu2_m128i
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1>
   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
-  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
   _mm256_storeu2_m128i(A, B, C);
 }

diff  --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 46717a78b49e..7f285618f35e 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -376,20 +376,20 @@ __m256i test_mm256_cvtepu32_epi64(__m128i a) {
 
 __m128i test0_mm256_extracti128_si256_0(__m256i a) {
   // CHECK-LABEL: test0_mm256_extracti128_si256
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
   return _mm256_extracti128_si256(a, 0);
 }
 
 __m128i test1_mm256_extracti128_si256_1(__m256i a) {
   // CHECK-LABEL: test1_mm256_extracti128_si256
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
   return _mm256_extracti128_si256(a, 1);
 }
 
 // Immediate should be truncated to one bit.
 __m128i test2_mm256_extracti128_si256(__m256i a) {
   // CHECK-LABEL: test2_mm256_extracti128_si256
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
   return _mm256_extracti128_si256(a, 0);
 }
 
@@ -645,14 +645,14 @@ __m128 test_mm256_mask_i64gather_ps(__m128 a, float const *b, __m256i c, __m128
 
 __m256i test0_mm256_inserti128_si256(__m256i a, __m128i b) {
   // CHECK-LABEL: test0_mm256_inserti128_si256
-  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
   return _mm256_inserti128_si256(a, b, 0);
 }
 
 __m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) {
   // CHECK-LABEL: test1_mm256_inserti128_si256
-  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   return _mm256_inserti128_si256(a, b, 1);
 }
@@ -660,7 +660,7 @@ __m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) {
 // Immediate should be truncated to one bit.
 __m256i test2_mm256_inserti128_si256(__m256i a, __m128i b) {
   // CHECK-LABEL: test2_mm256_inserti128_si256
-  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
   return _mm256_inserti128_si256(a, b, 0);
 }
@@ -895,13 +895,13 @@ __m256i test_mm256_permute2x128_si256(__m256i a, __m256i b) {
 
 __m256i test_mm256_permute4x64_epi64(__m256i a) {
   // CHECK-LABEL: test_mm256_permute4x64_epi64
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
   return _mm256_permute4x64_epi64(a, 35);
 }
 
 __m256d test_mm256_permute4x64_pd(__m256d a) {
   // CHECK-LABEL: test_mm256_permute4x64_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
   return _mm256_permute4x64_pd(a, 25);
 }
 
@@ -931,19 +931,19 @@ __m256i test_mm256_shuffle_epi8(__m256i a, __m256i b) {
 
 __m256i test_mm256_shuffle_epi32(__m256i a) {
   // CHECK-LABEL: test_mm256_shuffle_epi32
-  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
   return _mm256_shuffle_epi32(a, 15);
 }
 
 __m256i test_mm256_shufflehi_epi16(__m256i a) {
   // CHECK-LABEL: test_mm256_shufflehi_epi16
-  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
   return _mm256_shufflehi_epi16(a, 107);
 }
 
 __m256i test_mm256_shufflelo_epi16(__m256i a) {
   // CHECK-LABEL: test_mm256_shufflelo_epi16
-  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
   return _mm256_shufflelo_epi16(a, 83);
 }
 

diff  --git a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
index f20f0bd5c1a5..bb43d348205f 100644
--- a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
+++ b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
@@ -16,11 +16,11 @@ unsigned long long test_mm512_reduce_max_epu64(__m512i __W){
 
 double test_mm512_reduce_max_pd(__m512d __W){
 // CHECK-LABEL: @test_mm512_reduce_max_pd(
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
-// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>
+// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
 // CHECK:    call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
 // CHECK:    shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
 // CHECK:    call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
@@ -42,11 +42,11 @@ unsigned long long test_mm512_reduce_min_epu64(__m512i __W){
 
 double test_mm512_reduce_min_pd(__m512d __W){
 // CHECK-LABEL: @test_mm512_reduce_min_pd(
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
-// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>
+// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
 // CHECK:    call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
 // CHECK:    shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
 // CHECK:    call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
@@ -74,11 +74,11 @@ double test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_max_pd(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
-// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>
+// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
 // CHECK:    call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
 // CHECK:    shufflevector <2 x double> %{{.*}}, <2 x double>  %{{.*}}, <2 x i32> <i32 1, i32 0>
 // CHECK:    call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
@@ -106,11 +106,11 @@ double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_min_pd(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
-// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 0, i32 1>
+// CHECK:    shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
 // CHECK:    call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
 // CHECK:    shufflevector <2 x double> %{{.*}}, <2 x double>  %{{.*}}, <2 x i32> <i32 1, i32 0>
 // CHECK:    call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
@@ -132,11 +132,11 @@ unsigned int test_mm512_reduce_max_epu32(__m512i __W){
 
 float test_mm512_reduce_max_ps(__m512 __W){
 // CHECK-LABEL: define float @test_mm512_reduce_max_ps(
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
-// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
 // CHECK:    shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
 // CHECK:    call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
@@ -160,11 +160,11 @@ unsigned int test_mm512_reduce_min_epu32(__m512i __W){
 
 float test_mm512_reduce_min_ps(__m512 __W){
 // CHECK-LABEL: define float @test_mm512_reduce_min_ps(
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
-// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
 // CHECK:    shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
 // CHECK:    call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
@@ -194,11 +194,11 @@ float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){
 // CHECK-LABEL: define float @test_mm512_mask_reduce_max_ps(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
-// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
 // CHECK:    shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
 // CHECK:    call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
@@ -228,11 +228,11 @@ float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){
 // CHECK-LABEL: define float @test_mm512_mask_reduce_min_ps(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
-// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK:    shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 // CHECK:    call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
 // CHECK:    shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
 // CHECK:    call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})

diff  --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 58b2488f3caf..82ad1146c469 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1602,40 +1602,40 @@ __m512i test_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) {
 
 __m512i test_mm512_shufflehi_epi16(__m512i __A) {
   // CHECK-LABEL: @test_mm512_shufflehi_epi16
-  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
   return _mm512_shufflehi_epi16(__A, 5); 
 }
 
 __m512i test_mm512_mask_shufflehi_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_shufflehi_epi16
-  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_shufflehi_epi16(__W, __U, __A, 5); 
 }
 
 __m512i test_mm512_maskz_shufflehi_epi16(__mmask32 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_shufflehi_epi16
-  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_shufflehi_epi16(__U, __A, 5); 
 }
 
 __m512i test_mm512_shufflelo_epi16(__m512i __A) {
   // CHECK-LABEL: @test_mm512_shufflelo_epi16
-  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
   return _mm512_shufflelo_epi16(__A, 5); 
 }
 
 __m512i test_mm512_mask_shufflelo_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_shufflelo_epi16
-  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_mask_shufflelo_epi16(__W, __U, __A, 5); 
 }
 
 __m512i test_mm512_maskz_shufflelo_epi16(__mmask32 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_shufflelo_epi16
-  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
+  // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
   return _mm512_maskz_shufflelo_epi16(__U, __A, 5); 
 }

diff  --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 6ed0eef73797..de4f673d1718 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -1333,80 +1333,80 @@ __m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) {
 
 __m256 test_mm512_extractf32x8_ps(__m512 __A) {
   // CHECK-LABEL: @test_mm512_extractf32x8_ps
-  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm512_extractf32x8_ps(__A, 1); 
 }
 
 __m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) {
   // CHECK-LABEL: @test_mm512_mask_extractf32x8_ps
-  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_extractf32x8_ps(__W, __U, __A, 1); 
 }
 
 __m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) {
   // CHECK-LABEL: @test_mm512_maskz_extractf32x8_ps
-  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_extractf32x8_ps(__U, __A, 1); 
 }
 
 __m128d test_mm512_extractf64x2_pd(__m512d __A) {
   // CHECK-LABEL: @test_mm512_extractf64x2_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   return _mm512_extractf64x2_pd(__A, 3); 
 }
 
 __m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) {
   // CHECK-LABEL: @test_mm512_mask_extractf64x2_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm512_mask_extractf64x2_pd(__W, __U, __A, 3); 
 }
 
 __m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) {
   // CHECK-LABEL: @test_mm512_maskz_extractf64x2_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm512_maskz_extractf64x2_pd(__U, __A, 3); 
 }
 
 __m256i test_mm512_extracti32x8_epi32(__m512i __A) {
   // CHECK-LABEL: @test_mm512_extracti32x8_epi32
-  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm512_extracti32x8_epi32(__A, 1); 
 }
 
 __m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_extracti32x8_epi32
-  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 1); 
 }
 
 __m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_extracti32x8_epi32
-  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm512_maskz_extracti32x8_epi32(__U, __A, 1); 
 }
 
 __m128i test_mm512_extracti64x2_epi64(__m512i __A) {
   // CHECK-LABEL: @test_mm512_extracti64x2_epi64
-  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <2 x i32> <i32 6, i32 7>
   return _mm512_extracti64x2_epi64(__A, 3); 
 }
 
 __m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_extracti64x2_epi64
-  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <2 x i32> <i32 6, i32 7>
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 3); 
 }
 
 __m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_extracti64x2_epi64
-  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <2 x i32> <i32 6, i32 7>
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm512_maskz_extracti64x2_epi64(__U, __A, 3); 
 }

diff  --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index a4b23eb1cf5e..81bd5a040ae7 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -2362,20 +2362,20 @@ __mmask16 test_mm512_mask_cmpunord_ps_mask(__mmask16 k, __m512 a, __m512 b) {
 __m256d test_mm512_extractf64x4_pd(__m512d a)
 {
   // CHECK-LABEL: @test_mm512_extractf64x4_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm512_extractf64x4_pd(a, 1);
 }
 
 __m256d test_mm512_mask_extractf64x4_pd(__m256d  __W,__mmask8  __U,__m512d __A){
   // CHECK-LABEL:@test_mm512_mask_extractf64x4_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1);
 }
 
 __m256d test_mm512_maskz_extractf64x4_pd(__mmask8  __U,__m512d __A){
   // CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm512_maskz_extractf64x4_pd( __U, __A, 1);
 }
@@ -2383,20 +2383,20 @@ __m256d test_mm512_maskz_extractf64x4_pd(__mmask8  __U,__m512d __A){
 __m128 test_mm512_extractf32x4_ps(__m512 a)
 {
   // CHECK-LABEL: @test_mm512_extractf32x4_ps
-  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm512_extractf32x4_ps(a, 1);
 }
 
 __m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8  __U,__m512 __A){
   // CHECK-LABEL:@test_mm512_mask_extractf32x4_ps
-  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1);
 }
 
 __m128 test_mm512_maskz_extractf32x4_ps( __mmask8  __U,__m512 __A){
   // CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps
-  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
  return _mm512_maskz_extractf32x4_ps(  __U, __A, 1);
 }
@@ -5203,40 +5203,40 @@ __m512i test_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 _
 
 __m512d test_mm512_permute_pd(__m512d __X) {
   // CHECK-LABEL: @test_mm512_permute_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   return _mm512_permute_pd(__X, 2);
 }
 
 __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) {
   // CHECK-LABEL: @test_mm512_mask_permute_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_permute_pd(__W, __U, __X, 2);
 }
 
 __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) {
   // CHECK-LABEL: @test_mm512_maskz_permute_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_permute_pd(__U, __X, 2);
 }
 
 __m512 test_mm512_permute_ps(__m512 __X) {
   // CHECK-LABEL: @test_mm512_permute_ps
-  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
   return _mm512_permute_ps(__X, 2);
 }
 
 __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) {
   // CHECK-LABEL: @test_mm512_mask_permute_ps
-  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_permute_ps(__W, __U, __X, 2);
 }
 
 __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) {
   // CHECK-LABEL: @test_mm512_maskz_permute_ps
-  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_permute_ps(__U, __X, 2);
 }
@@ -6773,40 +6773,40 @@ void test_mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
 
 __m128i test_mm512_extracti32x4_epi32(__m512i __A) {
   // CHECK-LABEL: @test_mm512_extracti32x4_epi32
-  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
   return _mm512_extracti32x4_epi32(__A, 3); 
 }
 
 __m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_extracti32x4_epi32
-  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3); 
 }
 
 __m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_extracti32x4_epi32
-  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm512_maskz_extracti32x4_epi32(__U, __A, 3); 
 }
 
 __m256i test_mm512_extracti64x4_epi64(__m512i __A) {
   // CHECK-LABEL: @test_mm512_extracti64x4_epi64
-  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm512_extracti64x4_epi64(__A, 1); 
 }
 
 __m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_extracti64x4_epi64
-  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1); 
 }
 
 __m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_extracti64x4_epi64
-  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm512_maskz_extracti64x4_epi64(__U, __A, 1); 
 }
@@ -8169,40 +8169,40 @@ __m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __m
 
 __m512d test_mm512_permutex_pd(__m512d __X) {
   // CHECK-LABEL: @test_mm512_permutex_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   return _mm512_permutex_pd(__X, 0);
 }
 
 __m512d test_mm512_mask_permutex_pd(__m512d __W, __mmask8 __U, __m512d __X) {
   // CHECK-LABEL: @test_mm512_mask_permutex_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_permutex_pd(__W, __U, __X, 0);
 }
 
 __m512d test_mm512_maskz_permutex_pd(__mmask8 __U, __m512d __X) {
   // CHECK-LABEL: @test_mm512_maskz_permutex_pd
-  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_permutex_pd(__U, __X, 0);
 }
 
 __m512i test_mm512_permutex_epi64(__m512i __X) {
   // CHECK-LABEL: @test_mm512_permutex_epi64
-  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   return _mm512_permutex_epi64(__X, 0);
 }
 
 __m512i test_mm512_mask_permutex_epi64(__m512i __W, __mmask8 __M, __m512i __X) {
   // CHECK-LABEL: @test_mm512_mask_permutex_epi64
-  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_permutex_epi64(__W, __M, __X, 0);
 }
 
 __m512i test_mm512_maskz_permutex_epi64(__mmask8 __M, __m512i __X) {
   // CHECK-LABEL: @test_mm512_maskz_permutex_epi64
-  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_permutex_epi64(__M, __X, 0);
 }
@@ -8722,20 +8722,20 @@ __m512 test_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {
 
 __m512i test_mm512_shuffle_epi32(__m512i __A) {
   // CHECK-LABEL: @test_mm512_shuffle_epi32
-  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
   return _mm512_shuffle_epi32(__A, 1); 
 }
 
 __m512i test_mm512_mask_shuffle_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_shuffle_epi32
-  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_shuffle_epi32(__W, __U, __A, 1); 
 }
 
 __m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_shuffle_epi32
-  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_shuffle_epi32(__U, __A, 1); 
 }

diff  --git a/clang/test/CodeGen/X86/avx512vl-builtins-constrained.c b/clang/test/CodeGen/X86/avx512vl-builtins-constrained.c
index bc5951023629..4e6ac9d367f8 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins-constrained.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins-constrained.c
@@ -9,7 +9,7 @@
 __m128 test_mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A) {
   // COMMON-LABEL: @test_mm_mask_cvtph_ps
   // COMMONIR: bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // COMMONIR: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // COMMONIR: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // COMMONIR: bitcast <4 x i16> %{{.*}} to <4 x half>
   // UNCONSTRAINED: fpext <4 x half> %{{.*}} to <4 x float>
   // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict") 
@@ -20,7 +20,7 @@ __m128 test_mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A) {
 __m128 test_mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
   // COMMON-LABEL: @test_mm_maskz_cvtph_ps
   // COMMONIR: bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // COMMONIR: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // COMMONIR: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // COMMONIR: bitcast <4 x i16> %{{.*}} to <4 x half>
   // UNCONSTRAINED: fpext <4 x half> %{{.*}} to <4 x float>
   // CONSTRAINED: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict") 

diff  --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 248cb61d97ae..ef27542876a4 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -7350,56 +7350,56 @@ __m256 test_mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A) {
 
 __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) {
   // CHECK-LABEL: @test_mm_mask_permute_pd
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 0>
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_mask_permute_pd(__W, __U, __X, 1); 
 }
 
 __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) {
   // CHECK-LABEL: @test_mm_maskz_permute_pd
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 0>
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_maskz_permute_pd(__U, __X, 1); 
 }
 
 __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) {
   // CHECK-LABEL: @test_mm256_mask_permute_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_mask_permute_pd(__W, __U, __X, 5); 
 }
 
 __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) {
   // CHECK-LABEL: @test_mm256_maskz_permute_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_maskz_permute_pd(__U, __X, 5); 
 }
 
 __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) {
   // CHECK-LABEL: @test_mm_mask_permute_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_mask_permute_ps(__W, __U, __X, 0x1b); 
 }
 
 __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) {
   // CHECK-LABEL: @test_mm_maskz_permute_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_maskz_permute_ps(__U, __X, 0x1b); 
 }
 
 __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) {
   // CHECK-LABEL: @test_mm256_mask_permute_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_permute_ps(__W, __U, __X, 0x1b); 
 }
 
 __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) {
   // CHECK-LABEL: @test_mm256_maskz_permute_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_permute_ps(__U, __X, 0x1b); 
 }
@@ -9057,40 +9057,40 @@ void test_mm256_mask_cvtepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A
 
 __m128 test_mm256_extractf32x4_ps(__m256 __A) {
   // CHECK-LABEL: @test_mm256_extractf32x4_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm256_extractf32x4_ps(__A, 1); 
 }
 
 __m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_extractf32x4_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1); 
 }
 
 __m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_extractf32x4_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm256_maskz_extractf32x4_ps(__U, __A, 1); 
 }
 
 __m128i test_mm256_extracti32x4_epi32(__m256i __A) {
   // CHECK-LABEL: @test_mm256_extracti32x4_epi32
-  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm256_extracti32x4_epi32(__A, 1); 
 }
 
 __m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_extracti32x4_epi32
-  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1); 
 }
 
 __m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_extracti32x4_epi32
-  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm256_maskz_extracti32x4_epi32(__U, __A, 1); 
 }
@@ -9305,40 +9305,40 @@ __m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m25
 
 __m256d test_mm256_permutex_pd(__m256d __X) {
   // CHECK-LABEL: @test_mm256_permutex_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
   return _mm256_permutex_pd(__X, 3);
 }
 
 __m256d test_mm256_mask_permutex_pd(__m256d __W, __mmask8 __U, __m256d __X) {
   // CHECK-LABEL: @test_mm256_mask_permutex_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_mask_permutex_pd(__W, __U, __X, 1);
 }
 
 __m256d test_mm256_maskz_permutex_pd(__mmask8 __U, __m256d __X) {
   // CHECK-LABEL: @test_mm256_maskz_permutex_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_maskz_permutex_pd(__U, __X, 1);
 }
 
 __m256i test_mm256_permutex_epi64(__m256i __X) {
   // CHECK-LABEL: @test_mm256_permutex_epi64
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
   return _mm256_permutex_epi64(__X, 3);
 }
 
 __m256i test_mm256_mask_permutex_epi64(__m256i __W, __mmask8 __M, __m256i __X) {
   // CHECK-LABEL: @test_mm256_mask_permutex_epi64
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_mask_permutex_epi64(__W, __M, __X, 3);
 }
 
 __m256i test_mm256_maskz_permutex_epi64(__mmask8 __M, __m256i __X) {
   // CHECK-LABEL: @test_mm256_maskz_permutex_epi64
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_maskz_permutex_epi64(__M, __X, 3);
 }
@@ -9554,28 +9554,28 @@ __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) {
 
 __m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_shuffle_epi32
-  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_shuffle_epi32(__W, __U, __A, 1);
 }
 
 __m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_shuffle_epi32
-  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_shuffle_epi32(__U, __A, 2);
 }
 
 __m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_shuffle_epi32
-  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_shuffle_epi32(__W, __U, __A, 2);
 }
 
 __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_shuffle_epi32
-  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_shuffle_epi32(__U, __A, 2);
 }
@@ -9631,7 +9631,7 @@ __m256 test_mm256_maskz_mov_ps(__mmask8 __U, __m256 __A) {
 __m128 test_mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_cvtph_ps
   // CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
   // CHECK: fpext <4 x half> %{{.*}} to <4 x float>
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
@@ -9641,7 +9641,7 @@ __m128 test_mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A) {
 __m128 test_mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_cvtph_ps
   // CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
   // CHECK: fpext <4 x half> %{{.*}} to <4 x float>
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}

diff  --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index 36feafd29437..b7c94df09f05 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -3145,56 +3145,56 @@ __mmask16 test_mm256_movepi16_mask(__m256i __A) {
 
 __m128i test_mm_mask_shufflehi_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_shufflehi_epi16
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_shufflehi_epi16(__W, __U, __A, 5); 
 }
 
 __m128i test_mm_maskz_shufflehi_epi16(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_shufflehi_epi16
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_shufflehi_epi16(__U, __A, 5); 
 }
 
 __m128i test_mm_mask_shufflelo_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_shufflelo_epi16
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_shufflelo_epi16(__W, __U, __A, 5); 
 }
 
 __m128i test_mm_maskz_shufflelo_epi16(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_shufflelo_epi16
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_shufflelo_epi16(__U, __A, 5); 
 }
 
 __m256i test_mm256_mask_shufflehi_epi16(__m256i __W, __mmask16 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_shufflehi_epi16
-  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12>
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_shufflehi_epi16(__W, __U, __A, 5); 
 }
 
 __m256i test_mm256_maskz_shufflehi_epi16(__mmask16 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_shufflehi_epi16
-  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12>
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_shufflehi_epi16(__U, __A, 5); 
 }
 
 __m256i test_mm256_mask_shufflelo_epi16(__m256i __W, __mmask16 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_shufflelo_epi16
-  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_shufflelo_epi16(__W, __U, __A, 5); 
 }
 
 __m256i test_mm256_maskz_shufflelo_epi16(__mmask16 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_shufflelo_epi16
-  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_shufflelo_epi16(__U, __A, 5); 
 }

diff  --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index ddde427592c0..e6a82cc4f080 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -1028,40 +1028,40 @@ __m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) {
 
 __m128d test_mm256_extractf64x2_pd(__m256d __A) {
   // CHECK-LABEL: @test_mm256_extractf64x2_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
   return _mm256_extractf64x2_pd(__A, 1); 
 }
 
 __m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_mask_extractf64x2_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm256_mask_extractf64x2_pd(__W, __U, __A, 1); 
 }
 
 __m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_maskz_extractf64x2_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm256_maskz_extractf64x2_pd(__U, __A, 1); 
 }
 
 __m128i test_mm256_extracti64x2_epi64(__m256i __A) {
   // CHECK-LABEL: @test_mm256_extracti64x2_epi64
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
   return _mm256_extracti64x2_epi64(__A, 1); 
 }
 
 __m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_extracti64x2_epi64
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm256_mask_extracti64x2_epi64(__W, __U, __A, 1); 
 }
 
 __m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_extracti64x2_epi64
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm256_maskz_extracti64x2_epi64(__U, __A, 1); 
 }

diff  --git a/clang/test/CodeGen/X86/f16c-builtins-constrained.c b/clang/test/CodeGen/X86/f16c-builtins-constrained.c
index ce84155ad97a..51a9d967ea8f 100644
--- a/clang/test/CodeGen/X86/f16c-builtins-constrained.c
+++ b/clang/test/CodeGen/X86/f16c-builtins-constrained.c
@@ -13,7 +13,7 @@ float test_cvtsh_ss(unsigned short a) {
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
   // CHECK: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
@@ -37,7 +37,7 @@ unsigned short test_cvtss_sh(float a) {
 __m128 test_mm_cvtph_ps(__m128i a) {
   // CHECK-LABEL: test_mm_cvtph_ps
   // CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
   // CHECK: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
   return _mm_cvtph_ps(a);

diff  --git a/clang/test/CodeGen/X86/f16c-builtins.c b/clang/test/CodeGen/X86/f16c-builtins.c
index 1616cfb30947..bb8c9aa42209 100644
--- a/clang/test/CodeGen/X86/f16c-builtins.c
+++ b/clang/test/CodeGen/X86/f16c-builtins.c
@@ -13,7 +13,7 @@ float test_cvtsh_ss(unsigned short a) {
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
   // CHECK: fpext <4 x half> %{{.*}} to <4 x float>
   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
@@ -34,7 +34,7 @@ unsigned short test_cvtss_sh(float a) {
 __m128 test_mm_cvtph_ps(__m128i a) {
   // CHECK-LABEL: test_mm_cvtph_ps
   // CHECK: bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: bitcast <4 x i16> %{{.*}} to <4 x half>
   // CHECK: fpext <4 x half> %{{.*}} to <4 x float>
   return _mm_cvtph_ps(a);

diff  --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 23df78892418..cde561a361f2 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -1150,7 +1150,7 @@ __m128i test_mm_setzero_si128() {
 
 __m128i test_mm_shuffle_epi32(__m128i A) {
   // CHECK-LABEL: test_mm_shuffle_epi32
-  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
   return _mm_shuffle_epi32(A, 0);
 }
 
@@ -1162,13 +1162,13 @@ __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
 
 __m128i test_mm_shufflehi_epi16(__m128i A) {
   // CHECK-LABEL: test_mm_shufflehi_epi16
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
   return _mm_shufflehi_epi16(A, 0);
 }
 
 __m128i test_mm_shufflelo_epi16(__m128i A) {
   // CHECK-LABEL: test_mm_shufflelo_epi16
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
   return _mm_shufflelo_epi16(A, 0);
 }
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
index 746678f61d57..7fd0fd591a43 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
@@ -6,7 +6,7 @@
 
 // CHECK-LABEL: @test_vmovlbq_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
@@ -21,7 +21,7 @@ int16x8_t test_vmovlbq_s8(int8x16_t a)
 
 // CHECK-LABEL: @test_vmovlbq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK-NEXT:    [[TMP1:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -36,7 +36,7 @@ int32x4_t test_vmovlbq_s16(int16x8_t a)
 
 // CHECK-LABEL: @test_vmovlbq_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 // CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
@@ -51,7 +51,7 @@ uint16x8_t test_vmovlbq_u8(uint8x16_t a)
 
 // CHECK-LABEL: @test_vmovlbq_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 // CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -66,7 +66,7 @@ uint32x4_t test_vmovlbq_u16(uint16x8_t a)
 
 // CHECK-LABEL: @test_vmovltq_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 // CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
@@ -81,7 +81,7 @@ int16x8_t test_vmovltq_s8(int8x16_t a)
 
 // CHECK-LABEL: @test_vmovltq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 // CHECK-NEXT:    [[TMP1:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@@ -96,7 +96,7 @@ int32x4_t test_vmovltq_s16(int16x8_t a)
 
 // CHECK-LABEL: @test_vmovltq_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 // CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
@@ -111,7 +111,7 @@ uint16x8_t test_vmovltq_u8(uint8x16_t a)
 
 // CHECK-LABEL: @test_vmovltq_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 // CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
index 48935303809f..48c7bd4ce168 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
@@ -8,7 +8,7 @@
 
 // LE-LABEL: @test_vmovnbq_s16(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// LE-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 // LE-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // LE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[TMP1]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 // LE-NEXT:    [[TMP3:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
@@ -16,7 +16,7 @@
 //
 // BE-LABEL: @test_vmovnbq_s16(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// BE-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 // BE-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> [[TMP0]])
 // BE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[TMP1]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 // BE-NEXT:    [[TMP3:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
@@ -33,7 +33,7 @@ int8x16_t test_vmovnbq_s16(int8x16_t a, int16x8_t b)
 
 // LE-LABEL: @test_vmovnbq_s32(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// LE-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // LE-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
 // LE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
 // LE-NEXT:    [[TMP3:%.*]] = trunc <8 x i32> [[TMP2]] to <8 x i16>
@@ -41,7 +41,7 @@ int8x16_t test_vmovnbq_s16(int8x16_t a, int16x8_t b)
 //
 // BE-LABEL: @test_vmovnbq_s32(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// BE-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // BE-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> [[TMP0]])
 // BE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
 // BE-NEXT:    [[TMP3:%.*]] = trunc <8 x i32> [[TMP2]] to <8 x i16>
@@ -58,7 +58,7 @@ int16x8_t test_vmovnbq_s32(int16x8_t a, int32x4_t b)
 
 // LE-LABEL: @test_vmovnbq_u16(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// LE-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 // LE-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
 // LE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[TMP1]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 // LE-NEXT:    [[TMP3:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
@@ -66,7 +66,7 @@ int16x8_t test_vmovnbq_s32(int16x8_t a, int32x4_t b)
 //
 // BE-LABEL: @test_vmovnbq_u16(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// BE-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 // BE-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> [[TMP0]])
 // BE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[TMP1]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 // BE-NEXT:    [[TMP3:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
@@ -83,7 +83,7 @@ uint8x16_t test_vmovnbq_u16(uint8x16_t a, uint16x8_t b)
 
 // LE-LABEL: @test_vmovnbq_u32(
 // LE-NEXT:  entry:
-// LE-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// LE-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // LE-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
 // LE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
 // LE-NEXT:    [[TMP3:%.*]] = trunc <8 x i32> [[TMP2]] to <8 x i16>
@@ -91,7 +91,7 @@ uint8x16_t test_vmovnbq_u16(uint8x16_t a, uint16x8_t b)
 //
 // BE-LABEL: @test_vmovnbq_u32(
 // BE-NEXT:  entry:
-// BE-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// BE-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // BE-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> [[TMP0]])
 // BE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
 // BE-NEXT:    [[TMP3:%.*]] = trunc <8 x i32> [[TMP2]] to <8 x i16>

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c
index cec1e7077c44..5cb720dcb67b 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c
@@ -6,7 +6,7 @@
 
 // CHECK-LABEL: @test_vrev16q_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vrev16q_s8(int8x16_t a)
@@ -20,7 +20,7 @@ int8x16_t test_vrev16q_s8(int8x16_t a)
 
 // CHECK-LABEL: @test_vrev16q_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vrev16q_u8(uint8x16_t a)
@@ -34,7 +34,7 @@ uint8x16_t test_vrev16q_u8(uint8x16_t a)
 
 // CHECK-LABEL: @test_vrev32q_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vrev32q_s8(int8x16_t a)
@@ -48,7 +48,7 @@ int8x16_t test_vrev32q_s8(int8x16_t a)
 
 // CHECK-LABEL: @test_vrev32q_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vrev32q_u8(uint8x16_t a)
@@ -62,7 +62,7 @@ uint8x16_t test_vrev32q_u8(uint8x16_t a)
 
 // CHECK-LABEL: @test_vrev32q_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vrev32q_s16(int16x8_t a)
@@ -76,7 +76,7 @@ int16x8_t test_vrev32q_s16(int16x8_t a)
 
 // CHECK-LABEL: @test_vrev32q_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vrev32q_u16(uint16x8_t a)
@@ -90,7 +90,7 @@ uint16x8_t test_vrev32q_u16(uint16x8_t a)
 
 // CHECK-LABEL: @test_vrev32q_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 // CHECK-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vrev32q_f16(float16x8_t a)
@@ -104,7 +104,7 @@ float16x8_t test_vrev32q_f16(float16x8_t a)
 
 // CHECK-LABEL: @test_vrev64q_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vrev64q_s8(int8x16_t a)
@@ -118,7 +118,7 @@ int8x16_t test_vrev64q_s8(int8x16_t a)
 
 // CHECK-LABEL: @test_vrev64q_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vrev64q_u8(uint8x16_t a)
@@ -132,7 +132,7 @@ uint8x16_t test_vrev64q_u8(uint8x16_t a)
 
 // CHECK-LABEL: @test_vrev64q_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vrev64q_s16(int16x8_t a)
@@ -146,7 +146,7 @@ int16x8_t test_vrev64q_s16(int16x8_t a)
 
 // CHECK-LABEL: @test_vrev64q_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vrev64q_u16(uint16x8_t a)
@@ -160,7 +160,7 @@ uint16x8_t test_vrev64q_u16(uint16x8_t a)
 
 // CHECK-LABEL: @test_vrev64q_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 // CHECK-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vrev64q_f16(float16x8_t a)
@@ -174,7 +174,7 @@ float16x8_t test_vrev64q_f16(float16x8_t a)
 
 // CHECK-LABEL: @test_vrev64q_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 // CHECK-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vrev64q_f32(float32x4_t a)
@@ -188,7 +188,7 @@ float32x4_t test_vrev64q_f32(float32x4_t a)
 
 // CHECK-LABEL: @test_vrev64q_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vrev64q_s32(int32x4_t a)
@@ -202,7 +202,7 @@ int32x4_t test_vrev64q_s32(int32x4_t a)
 
 // CHECK-LABEL: @test_vrev64q_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vrev64q_u32(uint32x4_t a)

diff  --git a/clang/test/CodeGen/arm64-abi-vector.c b/clang/test/CodeGen/arm64-abi-vector.c
index fd828d99d28e..c911400a0c7b 100644
--- a/clang/test/CodeGen/arm64-abi-vector.c
+++ b/clang/test/CodeGen/arm64-abi-vector.c
@@ -433,7 +433,7 @@ __attribute__((noinline)) double args_vec_3d(int fixed, __double3 c3) {
 // CHECK: args_vec_3d
 // CHECK: [[CAST:%.*]] = bitcast <3 x double>* {{%.*}} to <4 x double>*
 // CHECK: [[LOAD:%.*]] = load <4 x double>, <4 x double>* [[CAST]]
-// CHECK: shufflevector <4 x double> [[LOAD]], <4 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK: shufflevector <4 x double> [[LOAD]], <4 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
   double sum = fixed;
   sum = sum + c3.x + c3.y;
   return sum;

diff  --git a/clang/test/CodeGenOpenCL/as_type.cl b/clang/test/CodeGenOpenCL/as_type.cl
index 51e82e3fef8e..95d64101efed 100644
--- a/clang/test/CodeGenOpenCL/as_type.cl
+++ b/clang/test/CodeGenOpenCL/as_type.cl
@@ -6,14 +6,14 @@ typedef __attribute__(( ext_vector_type(16) )) char char16;
 typedef __attribute__(( ext_vector_type(3) )) int int3;
 
 //CHECK: define spir_func <3 x i8> @f1(<4 x i8> %[[x:.*]])
-//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[x]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
 //CHECK: ret <3 x i8> %[[astype]]
 char3 f1(char4 x) {
   return  __builtin_astype(x, char3);
 }
 
 //CHECK: define spir_func <4 x i8> @f2(<3 x i8> %[[x:.*]])
-//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 //CHECK: ret <4 x i8> %[[astype]]
 char4 f2(char3 x) {
   return __builtin_astype(x, char4);
@@ -21,7 +21,7 @@ char4 f2(char3 x) {
 
 //CHECK: define spir_func <3 x i8> @f3(i32 %[[x:.*]])
 //CHECK: %[[cast:.*]] = bitcast i32 %[[x]] to <4 x i8>
-//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast]], <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
 //CHECK: ret <3 x i8> %[[astype]]
 char3 f3(int x) {
   return __builtin_astype(x, char3);
@@ -36,7 +36,7 @@ char4 f4(int x) {
 }
 
 //CHECK: define spir_func i32 @f5(<3 x i8> %[[x:.*]])
-//CHECK: %[[shuffle:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+//CHECK: %[[shuffle:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 //CHECK: %[[astype:.*]] = bitcast <4 x i8> %[[shuffle]] to i32
 //CHECK: ret i32 %[[astype]]
 int f5(char3 x) {
@@ -61,7 +61,7 @@ char3 f7(char3 x) {
 
 //CHECK: define spir_func <3 x i32> @f8(<16 x i8> %[[x:.*]])
 //CHECK: %[[cast:.*]] = bitcast <16 x i8> %[[x]] to <4 x i32>
-//CHECK: %[[astype:.*]] = shufflevector <4 x i32> %[[cast]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i32> %[[cast]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
 //CHECK: ret <3 x i32> %[[astype]]
 int3 f8(char16 x) {
   return __builtin_astype(x, int3);
@@ -91,14 +91,14 @@ int ptr_to_int(int *x) {
 //CHECK: define spir_func <3 x i8> @ptr_to_char3(i32* %[[x:.*]])
 //CHECK: %[[cast1:.*]] = ptrtoint i32* %[[x]] to i32
 //CHECK: %[[cast2:.*]] = bitcast i32 %[[cast1]] to <4 x i8>
-//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast2]], <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+//CHECK: %[[astype:.*]] = shufflevector <4 x i8> %[[cast2]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
 //CHECK: ret <3 x i8> %[[astype]]
 char3 ptr_to_char3(int *x) {
   return  __builtin_astype(x, char3);
 }
 
 //CHECK: define spir_func i32* @char3_to_ptr(<3 x i8> %[[x:.*]])
-//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 //CHECK: %[[cast1:.*]] = bitcast <4 x i8> %[[astype]] to i32
 //CHECK: %[[cast2:.*]] = inttoptr i32 %[[cast1]] to i32*
 //CHECK: ret i32* %[[cast2]]

diff  --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl
index 4e8299584135..2bdab2a0147b 100644
--- a/clang/test/CodeGenOpenCL/partial_initializer.cl
+++ b/clang/test/CodeGenOpenCL/partial_initializer.cl
@@ -51,7 +51,7 @@ void f(void) {
 
   // CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>* %[[compoundliteral1]], align 8
   // CHECK: %[[v6:.*]] = load <2 x i32>, <2 x i32>* %[[compoundliteral1]], align 8
-  // CHECK: %[[vext:.*]] = shufflevector <2 x i32> %[[v6]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  // CHECK: %[[vext:.*]] = shufflevector <2 x i32> %[[v6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   // CHECK: %[[vecinit:.*]] = shufflevector <4 x i32> %[[vext]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   // CHECK: %[[vecinit2:.*]] = insertelement <4 x i32> %[[vecinit]], i32 3, i32 2
   // CHECK: %[[vecinit3:.*]] = insertelement <4 x i32> %[[vecinit2]], i32 4, i32 3

diff  --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl
index 6efbbb3d5323..aefb248e51f4 100644
--- a/clang/test/CodeGenOpenCL/preserve_vec3.cl
+++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -11,14 +11,14 @@ void kernel foo(global float3 *a, global float3 *b) {
 
 void kernel float4_to_float3(global float3 *a, global float4 *b) {
   // CHECK: %[[LOAD_A:.*]] = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
-  // CHECK: %[[ASTYPE:.*]] = shufflevector <4 x float> %[[LOAD_A]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  // CHECK: %[[ASTYPE:.*]] = shufflevector <4 x float> %[[LOAD_A]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   // CHECK: store <3 x float> %[[ASTYPE:.*]], <3 x float> addrspace(1)* %a, align 16
   *a = __builtin_astype(*b, float3);
 }
 
 void kernel float3_to_float4(global float3 *a, global float4 *b) {
   // CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a, align 16
-  // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   // CHECK: store <4 x float> %[[ASTYPE:.*]], <4 x float> addrspace(1)* %b, align 16
   *b = __builtin_astype(*a, float4);
 }

diff  --git a/clang/test/CodeGenOpenCL/vectorLoadStore.cl b/clang/test/CodeGenOpenCL/vectorLoadStore.cl
index cb35e6f4689b..382a7ca8ac5e 100644
--- a/clang/test/CodeGenOpenCL/vectorLoadStore.cl
+++ b/clang/test/CodeGenOpenCL/vectorLoadStore.cl
@@ -8,7 +8,7 @@ typedef float float4 __attribute((ext_vector_type(4)));
 // Check for optimized vec3 load/store which treats vec3 as vec4.
 void foo(char3 *P, char3 *Q) {
   *P = *Q;
-  // CHECK: %{{.*}} = shufflevector <4 x i8> %{{.*}}, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  // CHECK: %{{.*}} = shufflevector <4 x i8> %{{.*}}, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
 }
 
 // CHECK: define spir_func void @alignment()

diff  --git a/clang/test/CodeGenOpenCL/vector_literals_valid.cl b/clang/test/CodeGenOpenCL/vector_literals_valid.cl
index 249c95cd756d..af571714368e 100644
--- a/clang/test/CodeGenOpenCL/vector_literals_valid.cl
+++ b/clang/test/CodeGenOpenCL/vector_literals_valid.cl
@@ -19,32 +19,32 @@ void vector_literals_valid() {
   int4 a_1_1_1_1 = (int4)(1, 2, c1.s2, c2.s3);
 
   //CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>*
-  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   //CHECK: insertelement <4 x i32> %{{.+}}, i32 3, i32 2
   //CHECK: insertelement <4 x i32> %{{.+}}, i32 4, i32 3
   int4 a_2_1_1 = (int4)((int2)(1, 2), 3, 4);
 
   //CHECK: store <2 x i32> <i32 2, i32 3>, <2 x i32>*
-  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   //CHECK: shufflevector <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
   //CHECK: insertelement <4 x i32> %{{.+}}, i32 4, i32 3
   int4 a_1_2_1 = (int4)(1, (int2)(2, 3), 4);
 
   //CHECK: store <2 x i32> <i32 3, i32 4>, <2 x i32>*
-  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   //CHECK: shufflevector <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   int4 a_1_1_2 = (int4)(1, 2, (int2)(3, 4));
 
   //CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>*
-  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> <i32 3, i32 3, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   int4 a_2_2 = (int4)((int2)(1, 2), (int2)(3));
 
   //CHECK: store <4 x i32> <i32 2, i32 3, i32 4, i32 undef>, <4 x i32>*
-  //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-  //CHECK: shufflevector <3 x i32> %{{.+}}, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  //CHECK: shufflevector <3 x i32> %{{.+}}, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   //CHECK: shufflevector <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
   int4 a_1_3 = (int4)(1, (int3)(2, 3, 4));
 
@@ -52,11 +52,11 @@ void vector_literals_valid() {
   int4 a = (int4)(1);
 
   //CHECK: load <4 x i32>, <4 x i32>* %a
-  //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+  //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   //CHECK: shufflevector <8 x i32> <i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <8 x i32> %{{.+}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
   //CHECK: load <4 x i32>, <4 x i32>* %a
-  //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   //CHECK: shufflevector <8 x i32> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   int8 b = (int8)(1, 2, a.xy, a);
 

diff  --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 90726979ca4a..1de17aac5170 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -829,8 +829,7 @@ static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1,
   if (NumElts1 > NumElts2) {
     // Extend with UNDEFs.
     V2 = Builder.CreateShuffleVector(
-        V2, UndefValue::get(VecTy2),
-        createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
+        V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
   }
 
   return Builder.CreateShuffleVector(

diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 0698c3c3b993..3ea758a48bad 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -6723,8 +6723,7 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
       cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
   Value *Insert = Builder.CreateInsertElement(UndefValue::get(NewVecType), BC1,
                                               (uint64_t)0);
-  Value *Shuffle = Builder.CreateShuffleVector(
-      Insert, UndefValue::get(NewVecType), SVI->getShuffleMask());
+  Value *Shuffle = Builder.CreateShuffleVector(Insert, SVI->getShuffleMask());
   Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
 
   SVI->replaceAllUsesWith(BC2);

diff  --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 80c5cc7506d4..a068e6669957 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -1240,8 +1240,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
       Mask.push_back(i + j * Factor);
 
     Builder.SetInsertPoint(VI.SVI);
-    auto SVI = Builder.CreateShuffleVector(LI, UndefValue::get(LI->getType()),
-                                           Mask, "interleaved.shuffle");
+    auto SVI = Builder.CreateShuffleVector(LI, Mask, "interleaved.shuffle");
     VI.SVI->replaceAllUsesWith(SVI);
     i++;
   }

diff  --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index fe7b7eb8966a..04f8cb51eb11 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -2397,7 +2397,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
                          Name.startswith("avx2.pmovzx") ||
                          Name.startswith("avx512.mask.pmovsx") ||
                          Name.startswith("avx512.mask.pmovzx"))) {
-      auto *SrcTy = cast<FixedVectorType>(CI->getArgOperand(0)->getType());
       auto *DstTy = cast<FixedVectorType>(CI->getType());
       unsigned NumDstElts = DstTy->getNumElements();
 
@@ -2406,8 +2405,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       for (unsigned i = 0; i != NumDstElts; ++i)
         ShuffleMask[i] = i;
 
-      Value *SV = Builder.CreateShuffleVector(
-          CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
+      Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0),
+                                              ShuffleMask);
 
       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
@@ -2434,11 +2433,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
                                             PointerType::getUnqual(VT));
       Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
       if (NumSrcElts == 2)
-        Rep = Builder.CreateShuffleVector(
-            Load, UndefValue::get(Load->getType()), ArrayRef<int>{0, 1, 0, 1});
+        Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
       else
         Rep =
-            Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+            Builder.CreateShuffleVector(Load,
                                         ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
                          Name.startswith("avx512.mask.shuf.f"))) {
@@ -2487,8 +2485,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Value *Op = CI->getArgOperand(0);
       ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
-      Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
-                                        Constant::getNullValue(MaskTy));
+      SmallVector<int, 8> M;
+      ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
+      Rep = Builder.CreateShuffleVector(Op, M);
 
       if (CI->getNumArgOperands() == 3)
         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
@@ -2581,13 +2580,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Imm = Imm % Scale;
 
       // Extend the second operand into a vector the size of the destination.
-      Value *UndefV = UndefValue::get(Op1->getType());
       SmallVector<int, 8> Idxs(DstNumElts);
       for (unsigned i = 0; i != SrcNumElts; ++i)
         Idxs[i] = i;
       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
         Idxs[i] = SrcNumElts;
-      Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
+      Rep = Builder.CreateShuffleVector(Op1, Idxs);
 
       // Insert the second operand into the first operand.
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index b668c9d1d56f..b7862f6ef9a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1039,8 +1039,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
       EltMask.push_back(NewNumElts);
   }
 
-  Value *Shuffle =
-      IC.Builder.CreateShuffleVector(NewCall, UndefValue::get(NewTy), EltMask);
+  Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
 
   return Shuffle;
 }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 650c872182ec..63aee097c0da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -241,7 +241,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
                                             Arg.getName() + ".load");
       Arg.replaceAllUsesWith(NewVal);
     } else if (IsV3) {
-      Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty),
+      Value *Shuf = Builder.CreateShuffleVector(Load,
                                                 ArrayRef<int>{0, 1, 2},
                                                 Arg.getName() + ".load");
       Arg.replaceAllUsesWith(Shuf);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 85d26cd4d0ff..4c199e8b2027 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -409,8 +409,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
         if (DL->getTypeSizeInBits(EffectiveEltTy) !=
             DL->getTypeSizeInBits(Val->getType())) {
           assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType()));
-          Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()),
-                                      ArrayRef<int>{0, 1, 2});
+          Val = B.CreateShuffleVector(Val, ArrayRef<int>{0, 1, 2});
         }
 
         Val = B.CreateBitCast(Val, EffectiveEltTy);

diff  --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 4fbe7e4b3931..52b0a1a58c19 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -1104,8 +1104,7 @@ auto HexagonVectorCombine::concat(IRBuilder<> &Builder,
   SMask.resize(Vecs.size() * getSizeOf(Vecs.front()->getType()));
   std::iota(SMask.begin(), SMask.end(), 0);
   Value *Total = Work[OtherW].front();
-  return Builder.CreateShuffleVector(Total, UndefValue::get(Total->getType()),
-                                     SMask);
+  return Builder.CreateShuffleVector(Total, SMask);
 }
 
 auto HexagonVectorCombine::vresize(IRBuilder<> &Builder, Value *Val,

diff  --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index 866c53235db4..5546f27b9488 100644
--- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -295,8 +295,7 @@ static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
 
   if (VecElems == 16) {
     for (unsigned i = 0; i < Stride; i++)
-      TransposedMatrix[i] = Builder.CreateShuffleVector(
-          Vec[i], UndefValue::get(Vec[i]->getType()), VPShuf);
+      TransposedMatrix[i] = Builder.CreateShuffleVector(Vec[i], VPShuf);
     return;
   }
 

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 3dc8508aa760..8f2e694f3c5b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1692,8 +1692,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       for (; i != VecNumElts; ++i)
         WidenMask.push_back(UndefMaskElem);
 
-      Value *WidenShuffle = Builder.CreateShuffleVector(
-          SubVec, llvm::UndefValue::get(SubVecTy), WidenMask);
+      Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
 
       SmallVector<int, 8> Mask;
       for (unsigned i = 0; i != IdxN; ++i)

diff  --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 5ff4c45f681c..2f456200a002 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3197,10 +3197,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
            "pclmul 3rd operand must be a constant");
     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
     Value *Shuf0 =
-        IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy),
+        IRB.CreateShuffleVector(getShadow(&I, 0),
                                 getPclmulMask(Width, Imm & 0x01));
     Value *Shuf1 =
-        IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy),
+        IRB.CreateShuffleVector(getShadow(&I, 1),
                                 getPclmulMask(Width, Imm & 0x10));
     ShadowAndOriginCombiner SOC(this, IRB);
     SOC.Add(Shuf0, getOrigin(&I, 0));

diff  --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index a00ed11c8fbb..fb08891bc90c 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -336,7 +336,7 @@ class LowerMatrixIntrinsics {
       Value *Vec = isColumnMajor() ? getColumn(J) : getRow(I);
       Value *Undef = UndefValue::get(Vec->getType());
       return Builder.CreateShuffleVector(
-          Vec, Undef, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0),
+          Vec, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0),
           "block");
     }
   };
@@ -452,7 +452,7 @@ class LowerMatrixIntrinsics {
          MaskStart < cast<FixedVectorType>(VType)->getNumElements();
          MaskStart += SI.getStride()) {
       Value *V = Builder.CreateShuffleVector(
-          MatrixVal, Undef, createSequentialMask(MaskStart, SI.getStride(), 0),
+          MatrixVal, createSequentialMask(MaskStart, SI.getStride(), 0),
           "split");
       SplitVecs.push_back(V);
     }
@@ -943,8 +943,7 @@ class LowerMatrixIntrinsics {
 
     Value *Undef = UndefValue::get(Block->getType());
     Block = Builder.CreateShuffleVector(
-        Block, Undef,
-        createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts));
+        Block, createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts));
 
     // If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7,
     // 8, 4, 5, 6

diff  --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index ed68ce6f8282..0805cb52db7b 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2207,8 +2207,7 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
   Mask.reserve(NumElements);
   for (unsigned i = BeginIndex; i != EndIndex; ++i)
     Mask.push_back(i);
-  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), Mask,
-                              Name + ".extract");
+  V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
   LLVM_DEBUG(dbgs() << "     shuffle: " << *V << "\n");
   return V;
 }
@@ -2241,22 +2240,22 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
   // use a shuffle vector to widen it with undef elements, and then
   // a second shuffle vector to select between the loaded vector and the
   // incoming vector.
-  SmallVector<Constant *, 8> Mask;
+  SmallVector<int, 8> Mask;
   Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
   for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
     if (i >= BeginIndex && i < EndIndex)
-      Mask.push_back(IRB.getInt32(i - BeginIndex));
+      Mask.push_back(i - BeginIndex);
     else
-      Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
-  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
-                              ConstantVector::get(Mask), Name + ".expand");
+      Mask.push_back(-1);
+  V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
   LLVM_DEBUG(dbgs() << "    shuffle: " << *V << "\n");
 
-  Mask.clear();
+  SmallVector<Constant *, 8> Mask2;
+  Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
   for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
-    Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
+    Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
 
-  V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
+  V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
 
   LLVM_DEBUG(dbgs() << "    blend: " << *V << "\n");
   return V;

diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 653680e5dc1e..80ae6b37e132 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -958,8 +958,7 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
     // Fill the rest of the mask with undef.
     std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
 
-    Value *Shuf = Builder.CreateShuffleVector(
-        TmpVec, UndefValue::get(TmpVec->getType()), ShuffleMask, "rdx.shuf");
+    Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
 
     if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
       // The builder propagates its fast-math-flags setting.

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll
index 970422164a49..38ccc5788fd7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll
@@ -14,10 +14,10 @@ entry:
 ; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 2
 ; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <16 x float>*
 ; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, <16 x float>* [[CAST]], align 16
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
 ; CHECK: ret void
 
 ;;; Check if it gets lowerd
@@ -60,10 +60,10 @@ entry:
 ; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 [[LSHR]]
 ; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <16 x float>*
 ; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, <16 x float>* [[CAST]], align 16
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
 ; CHECK: ret void
 
 ; AS-LABEL: aarch64_ilc_idx
@@ -122,10 +122,10 @@ entry:
 ; CHECK-DAG: [[GEP:%.+]] = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 [[LSHR]]
 ; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <16 x float>*
 ; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, <16 x float>* [[CAST]], align 4
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
 ; CHECK: ret void
 
 ; AS-LABEL: aarch64_ilc_struct
@@ -180,8 +180,8 @@ entry:
 ; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 [[LSHR]]
 ; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <8 x float>*
 ; CHECK-DAG: [[LOAD:%.+]] = load <8 x float>, <8 x float>* [[CAST]], align 16
-; CHECK: %{{.* }}= shufflevector <8 x float> [[LOAD]], <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK: %{{.* }}= shufflevector <8 x float> [[LOAD]], <8 x float> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: %{{.* }}= shufflevector <8 x float> [[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: %{{.* }}= shufflevector <8 x float> [[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; CHECK-DAG: ret void
 
 ; AS-LABEL: aarch64_ilc_idx_ld2
@@ -212,9 +212,9 @@ entry:
 ; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 [[LSHR]]
 ; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <12 x float>*
 ; CHECK-DAG: [[LOAD:%.+]] = load <12 x float>, <12 x float>* [[CAST]], align 16
-; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
 ; CHECK-DAG: ret void
 
 ; AS-LABEL: aarch64_ilc_idx_ld3
@@ -390,17 +390,17 @@ entry:
 ; CHECK-NEXT: %b0 = bitcast float* %p0 to <7 x float>*
 ; CHECK-NEXT: %b1 = bitcast float* %p1 to <7 x float>*
 ; CHECK-NEXT: %l1 = load <7 x float>, <7 x float>* %b1
-; CHECK-NEXT: %s1 = shufflevector <7 x float> %l1, <7 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: %s1 = shufflevector <7 x float> %l1, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT: %l0 = load <7 x float>, <7 x float>* %b0
-; CHECK-NEXT: %s0 = shufflevector <7 x float> %l0, <7 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: %s0 = shufflevector <7 x float> %l0, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT: ret void
   %p0 = getelementptr inbounds float, float* %ptr, i32 0
   %p1 = getelementptr inbounds float, float* %ptr, i32 1
   %b0 = bitcast float* %p0 to <7 x float>*
   %b1 = bitcast float* %p1 to <7 x float>*
   %l1 = load <7 x float>, <7 x float>* %b1
-  %s1 = shufflevector <7 x float> %l1, <7 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %s1 = shufflevector <7 x float> %l1, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %l0 = load <7 x float>, <7 x float>* %b0
-  %s0 = shufflevector <7 x float> %l0, <7 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %s0 = shufflevector <7 x float> %l0, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
index a9ebd798f33a..3d5b61802cf7 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -310,7 +310,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; HSA-NEXT:    [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; HSA-NEXT:    store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4
 ; HSA-NEXT:    ret void
 ;
@@ -319,7 +319,7 @@ define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) {
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; MESA-NEXT:    [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; MESA-NEXT:    store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4
 ; MESA-NEXT:    ret void
 ;
@@ -399,7 +399,7 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
 ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16
 ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
 ; HSA-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; HSA-NEXT:    [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; HSA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef, align 4
 ; HSA-NEXT:    store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
 ; HSA-NEXT:    ret void
@@ -412,7 +412,7 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
 ; MESA-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52
 ; MESA-NEXT:    [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
 ; MESA-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; MESA-NEXT:    [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; MESA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef, align 4
 ; MESA-NEXT:    store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
 ; MESA-NEXT:    ret void
@@ -431,7 +431,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
 ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef, align 4
+; HSA-NEXT:    store [[STRUCT_A]] [[ARG0_LOAD]], [[STRUCT_A]] addrspace(1)* undef, align 4
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_struct_a(
@@ -439,7 +439,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
 ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef, align 4
+; MESA-NEXT:    store [[STRUCT_A]] [[ARG0_LOAD]], [[STRUCT_A]] addrspace(1)* undef, align 4
 ; MESA-NEXT:    ret void
 ;
   store %struct.a %arg0, %struct.a addrspace(1)* undef
@@ -452,7 +452,7 @@ define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
 ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
 ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef, align 16
+; HSA-NEXT:    store [[STRUCT_B_PACKED]] [[ARG0_LOAD]], [[STRUCT_B_PACKED]] addrspace(1)* undef, align 16
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_struct_b_packed(
@@ -460,7 +460,7 @@ define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36
 ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
 ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef, align 16
+; MESA-NEXT:    store [[STRUCT_B_PACKED]] [[ARG0_LOAD]], [[STRUCT_B_PACKED]] addrspace(1)* undef, align 16
 ; MESA-NEXT:    ret void
 ;
   store %struct.b.packed %arg0, %struct.b.packed addrspace(1)* undef

diff  --git a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll
index 1ab38398acd7..e23ffc96342a 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll
@@ -19,7 +19,7 @@ define void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %val) #0 {
 ; CHECK-LABEL: define private %bitcast_pointer_as1 @bitcast_pointer_as1.body(<3 x i32> addrspace(1)* %out) #0 {
 ; CHECK-NEXT: %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef
 ; CHECK-NEXT: %bitcast = bitcast <3 x i32> addrspace(1)* %out to <4 x i32> addrspace(1)*
-; CHECK-NEXT: %1 = shufflevector <4 x i32> %load, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: %1 = shufflevector <4 x i32> %load, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT: %2 = insertvalue %bitcast_pointer_as1 undef, <3 x i32> %1, 0
 ; CHECK-NEXT: ret %bitcast_pointer_as1 %2
 

diff  --git a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
index 673fbb991ad1..fdffbcd7d5bc 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
@@ -568,7 +568,7 @@ define void @preserve_metadata(i32 %arg0, i32* %val) #0 !kernel_arg_access_qual
 ; CHECK-LABEL: define private %bitcast_pointer_v4i32_v3i32 @bitcast_pointer_v4i32_v3i32.body(<3 x i32>* %out) #0 {
 ; CHECK-NEXT: %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef
 ; CHECK-NEXT: %bitcast = bitcast <3 x i32>* %out to <4 x i32>*
-; CHECK-NEXT: %1 = shufflevector <4 x i32> %load, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: %1 = shufflevector <4 x i32> %load, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT: %2 = insertvalue %bitcast_pointer_v4i32_v3i32 undef, <3 x i32> %1, 0
 ; CHECK-NEXT: ret %bitcast_pointer_v4i32_v3i32 %2
 
@@ -587,7 +587,7 @@ define void @bitcast_pointer_v4i32_v3i32(<3 x i32>* %out) #0 {
 ; CHECK-LABEL: define private %bitcast_pointer_v4i32_v3f32 @bitcast_pointer_v4i32_v3f32.body(<3 x float>* %out) #0 {
 ; CHECK-NEXT: %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef
 ; CHECK-NEXT: %bitcast = bitcast <3 x float>* %out to <4 x i32>*
-; CHECK-NEXT: %1 = shufflevector <4 x i32> %load, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: %1 = shufflevector <4 x i32> %load, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT: %2 = bitcast <3 x i32> %1 to <3 x float>
 ; CHECK-NEXT: %3 = insertvalue %bitcast_pointer_v4i32_v3f32 undef, <3 x float> %2, 0
 ; CHECK-NEXT: ret %bitcast_pointer_v4i32_v3f32 %3
@@ -645,9 +645,9 @@ define void @bitcast_pointer_f16_i32(i32* %out) #0 {
 %struct.v4f32 = type { <4 x float> }
 
 ; CHECK-LABEL: define private %bitcast_struct_v3f32_v3f32 @bitcast_struct_v3f32_v3f32.body(%struct.v3f32* %out, <3 x float> %value) #0 {
-; CHECK-NEXT: %extractVec = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT: %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 ; CHECK-NEXT: %cast = bitcast %struct.v3f32* %out to <4 x float>*
-; CHECK-NEXT: %1 = shufflevector <4 x float> %extractVec, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: %1 = shufflevector <4 x float> %extractVec, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT: %2 = insertvalue %struct.v3f32 undef, <3 x float> %1, 0
 ; CHECK-NEXT: %3 = insertvalue %bitcast_struct_v3f32_v3f32 undef, %struct.v3f32 %2, 0
 ; CHECK-NEXT: ret %bitcast_struct_v3f32_v3f32 %3
@@ -658,16 +658,16 @@ define void @bitcast_pointer_f16_i32(i32* %out) #0 {
 ; CHECK-NEXT: store %struct.v3f32 %4, %struct.v3f32* %0, align 16
 ; CHECK-NEXT: ret void
 define void @bitcast_struct_v3f32_v3f32(%struct.v3f32* %out, <3 x float> %value) #0 {
-  %extractVec = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %cast = bitcast %struct.v3f32* %out to <4 x float>*
   store <4 x float> %extractVec, <4 x float>* %cast, align 16
   ret void
 }
 
 ; CHECK-LABEL: define private %bitcast_struct_v3f32_v3i32 @bitcast_struct_v3f32_v3i32.body(%struct.v3f32* %out, <3 x i32> %value) #0 {
-; CHECK-NEXT: %extractVec = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT: %extractVec = shufflevector <3 x i32> %value, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 ; CHECK-NEXT: %cast = bitcast %struct.v3f32* %out to <4 x i32>*
-; CHECK-NEXT: %1 = shufflevector <4 x i32> %extractVec, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: %1 = shufflevector <4 x i32> %extractVec, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK-NEXT: %2 = bitcast <3 x i32> %1 to <3 x float>
 ; CHECK-NEXT: %3 = insertvalue %struct.v3f32 undef, <3 x float> %2, 0
 ; CHECK-NEXT: %4 = insertvalue %bitcast_struct_v3f32_v3i32 undef, %struct.v3f32 %3, 0
@@ -678,7 +678,7 @@ define void @bitcast_struct_v3f32_v3f32(%struct.v3f32* %out, <3 x float> %value)
 ; CHECK-NEXT: %4 = extractvalue %bitcast_struct_v3f32_v3i32 %3, 0
 ; CHECK-NEXT: store %struct.v3f32 %4, %struct.v3f32* %0, align 16
 define void @bitcast_struct_v3f32_v3i32(%struct.v3f32* %out, <3 x i32> %value) #0 {
-  %extractVec = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec = shufflevector <3 x i32> %value, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %cast = bitcast %struct.v3f32* %out to <4 x i32>*
   store <4 x i32> %extractVec, <4 x i32>* %cast, align 16
   ret void
@@ -709,7 +709,7 @@ define void @bitcast_struct_v3f32_v4i32(%struct.v3f32* %out, <4 x i32> %value) #
 ; CHECK-LABEL: define private %bitcast_struct_v4f32_v3f32 @bitcast_struct_v4f32_v3f32.body(%struct.v4f32* %out, <3 x float> %value) #0 {
 ; CHECK-LABEL: define void @bitcast_struct_v4f32_v3f32(%struct.v4f32* %0, <3 x float> %1) #2 {
 define void @bitcast_struct_v4f32_v3f32(%struct.v4f32* %out, <3 x float> %value) #0 {
-  %extractVec = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %cast = bitcast %struct.v4f32* %out to <4 x float>*
   store <4 x float> %extractVec, <4 x float>* %cast, align 16
   ret void
@@ -728,7 +728,7 @@ define void @bitcast_struct_v3f32_v2f32(%struct.v3f32* %out, <2 x float> %value)
 ; CHECK-LABEL: define void @bitcast_struct_v3f32_f32_v3f32(%struct.v3f32.f32* %out, <3 x float> %value) #0 {
 ; CHECK-NOT: call
 define void @bitcast_struct_v3f32_f32_v3f32(%struct.v3f32.f32* %out, <3 x float> %value) #0 {
-  %extractVec = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %cast = bitcast %struct.v3f32.f32* %out to <4 x float>*
   store <4 x float> %extractVec, <4 x float>* %cast, align 16
   ret void
@@ -766,7 +766,7 @@ define void @bitcast_array_v4i32_v4f32([4 x i32]* %out, [4 x float] %value) #0 {
 ; CHECK-LABEL: define private %multi_return_bitcast_struct_v3f32_v3f32 @multi_return_bitcast_struct_v3f32_v3f32.body(i1 %cond, %struct.v3f32* %out, <3 x float> %value) #0 {
 ; CHECK: ret0:
 ; CHECK: %cast0 = bitcast %struct.v3f32* %out to <4 x float>*
-; CHECK: %0 = shufflevector <4 x float> %extractVec, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK: %0 = shufflevector <4 x float> %extractVec, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
 ; CHECK: %1 = insertvalue %struct.v3f32 undef, <3 x float> %0, 0
 ; CHECK: %2 = insertvalue %multi_return_bitcast_struct_v3f32_v3f32 undef, %struct.v3f32 %1, 0
 ; CHECK: ret %multi_return_bitcast_struct_v3f32_v3f32 %2
@@ -780,7 +780,7 @@ entry:
   br i1 %cond, label %ret0, label %ret1
 
 ret0:
-  %extractVec = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
   %cast0 = bitcast %struct.v3f32* %out to <4 x float>*
   store <4 x float> %extractVec, <4 x float>* %cast0, align 16
   ret void

diff  --git a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
index 99aa181bd70f..0202193bdd5b 100644
--- a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
+++ b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
@@ -23,7 +23,7 @@ declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>)
 define i64 @add_i64(<2 x i64> %vec) {
 ; CHECK-LABEL: @add_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
 ; CHECK-NEXT:    ret i64 [[TMP0]]
@@ -36,7 +36,7 @@ entry:
 define i64 @mul_i64(<2 x i64> %vec) {
 ; CHECK-LABEL: @mul_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
 ; CHECK-NEXT:    ret i64 [[TMP0]]
@@ -49,7 +49,7 @@ entry:
 define i64 @and_i64(<2 x i64> %vec) {
 ; CHECK-LABEL: @and_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
 ; CHECK-NEXT:    ret i64 [[TMP0]]
@@ -62,7 +62,7 @@ entry:
 define i64 @or_i64(<2 x i64> %vec) {
 ; CHECK-LABEL: @or_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
 ; CHECK-NEXT:    ret i64 [[TMP0]]
@@ -75,7 +75,7 @@ entry:
 define i64 @xor_i64(<2 x i64> %vec) {
 ; CHECK-LABEL: @xor_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
 ; CHECK-NEXT:    ret i64 [[TMP0]]
@@ -88,9 +88,9 @@ entry:
 define float @fadd_f32(<4 x float> %vec) {
 ; CHECK-LABEL: @fadd_f32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd fast float 0.000000e+00, [[TMP0]]
@@ -104,9 +104,9 @@ entry:
 define float @fadd_f32_accum(float %accum, <4 x float> %vec) {
 ; CHECK-LABEL: @fadd_f32_accum(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fadd fast float [[ACCUM:%.*]], [[TMP0]]
@@ -156,9 +156,9 @@ entry:
 define float @fmul_f32(<4 x float> %vec) {
 ; CHECK-LABEL: @fmul_f32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul fast float 1.000000e+00, [[TMP0]]
@@ -172,9 +172,9 @@ entry:
 define float @fmul_f32_accum(float %accum, <4 x float> %vec) {
 ; CHECK-LABEL: @fmul_f32_accum(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
 ; CHECK-NEXT:    [[BIN_RDX3:%.*]] = fmul fast float [[ACCUM:%.*]], [[TMP0]]
@@ -224,7 +224,7 @@ entry:
 define i64 @smax_i64(<2 x i64> %vec) {
 ; CHECK-LABEL: @smax_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
@@ -238,7 +238,7 @@ entry:
 define i64 @smin_i64(<2 x i64> %vec) {
 ; CHECK-LABEL: @smin_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
@@ -252,7 +252,7 @@ entry:
 define i64 @umax_i64(<2 x i64> %vec) {
 ; CHECK-LABEL: @umax_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
@@ -266,7 +266,7 @@ entry:
 define i64 @umin_i64(<2 x i64> %vec) {
 ; CHECK-LABEL: @umin_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0

diff  --git a/llvm/test/Instrumentation/MemorySanitizer/clmul.ll b/llvm/test/Instrumentation/MemorySanitizer/clmul.ll
index 772d19c66457..987609841c88 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/clmul.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/clmul.ll
@@ -20,8 +20,8 @@ entry:
 ; CHECK-LABEL: @clmul00
 ; CHECK: %[[S0:.*]] = load <2 x i64>, <2 x i64>* {{.*}}@__msan_param_tls
 ; CHECK: %[[S1:.*]] = load <2 x i64>, <2 x i64>* {{.*}}@__msan_param_tls
-; CHECK: %[[SHUF0:.*]] = shufflevector <2 x i64> %[[S0]], <2 x i64> undef, <2 x i32> zeroinitializer
-; CHECK: %[[SHUF1:.*]] = shufflevector <2 x i64> %[[S1]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK: %[[SHUF0:.*]] = shufflevector <2 x i64> %[[S0]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK: %[[SHUF1:.*]] = shufflevector <2 x i64> %[[S1]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK: %[[SRET:.*]] = or <2 x i64> %[[SHUF0]], %[[SHUF1]]
 ; CHECK: store <2 x i64> %[[SRET]], <2 x i64>* {{.*}}@__msan_retval_tls
 
@@ -34,8 +34,8 @@ entry:
 ; CHECK-LABEL: @clmul10
 ; CHECK: %[[S0:.*]] = load <2 x i64>, <2 x i64>* {{.*}}@__msan_param_tls
 ; CHECK: %[[S1:.*]] = load <2 x i64>, <2 x i64>* {{.*}}@__msan_param_tls
-; CHECK: %[[SHUF0:.*]] = shufflevector <2 x i64> %[[S0]], <2 x i64> undef, <2 x i32> zeroinitializer
-; CHECK: %[[SHUF1:.*]] = shufflevector <2 x i64> %[[S1]], <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+; CHECK: %[[SHUF0:.*]] = shufflevector <2 x i64> %[[S0]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK: %[[SHUF1:.*]] = shufflevector <2 x i64> %[[S1]], <2 x i64> poison, <2 x i32> <i32 1, i32 1>
 ; CHECK: %[[SRET:.*]] = or <2 x i64> %[[SHUF0]], %[[SHUF1]]
 ; CHECK: store <2 x i64> %[[SRET]], <2 x i64>* {{.*}}@__msan_retval_tls
 
@@ -48,8 +48,8 @@ entry:
 ; CHECK-LABEL: @clmul11_256
 ; CHECK: %[[S0:.*]] = load <4 x i64>, <4 x i64>* {{.*}}@__msan_param_tls
 ; CHECK: %[[S1:.*]] = load <4 x i64>, <4 x i64>* {{.*}}@__msan_param_tls
-; CHECK: %[[SHUF0:.*]] = shufflevector <4 x i64> %[[S0]], <4 x i64> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-; CHECK: %[[SHUF1:.*]] = shufflevector <4 x i64> %[[S1]], <4 x i64> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK: %[[SHUF0:.*]] = shufflevector <4 x i64> %[[S0]], <4 x i64> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+; CHECK: %[[SHUF1:.*]] = shufflevector <4 x i64> %[[S1]], <4 x i64> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
 ; CHECK: %[[SRET:.*]] = or <4 x i64> %[[SHUF0]], %[[SHUF1]]
 ; CHECK: store <4 x i64> %[[SRET]], <4 x i64>* {{.*}}@__msan_retval_tls
 
@@ -62,8 +62,8 @@ entry:
 ; CHECK-LABEL: @clmul01_512
 ; CHECK: %[[S0:.*]] = load <8 x i64>, <8 x i64>* {{.*}}@__msan_param_tls
 ; CHECK: %[[S1:.*]] = load <8 x i64>, <8 x i64>* {{.*}}@__msan_param_tls
-; CHECK: %[[SHUF0:.*]] = shufflevector <8 x i64> %[[S0]], <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
-; CHECK: %[[SHUF1:.*]] = shufflevector <8 x i64> %[[S1]], <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+; CHECK: %[[SHUF0:.*]] = shufflevector <8 x i64> %[[S0]], <8 x i64> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+; CHECK: %[[SHUF1:.*]] = shufflevector <8 x i64> %[[S1]], <8 x i64> poison, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
 ; CHECK: %[[SRET:.*]] = or <8 x i64> %[[SHUF0]], %[[SHUF1]]
 ; ORIGIN: %[[FLAT:.*]] = bitcast <8 x i64> %[[SHUF1]] to i512
 ; ORIGIN: %[[I:.*]] = icmp ne i512 %[[FLAT]], 0

diff  --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll
index a8b4601cc1c9..e2a8b27d21d7 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll
@@ -105,7 +105,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v4f32(<4 x i32> inre
 
 ; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
@@ -191,7 +191,7 @@ define amdgpu_ps { float, float, float } @extract_elt0_elt1_elt2_buffer_load_v4f
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_3(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
 ; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 undef, i32 1>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 undef, i32 1>
 ; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
 define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
@@ -207,7 +207,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i3
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_4(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
 ; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
 ; CHECK-NEXT: ret <2 x float> %ret
 define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_4(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
@@ -224,7 +224,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_4(<4 x i3
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_5(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
 ; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 2, i32 2>
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
 define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_5(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
@@ -341,7 +341,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v4f32(<4 x i3
 ; The initial insertion point is at the extractelement
 ; CHECK-LABEL: @extract01_bitcast_buffer_load_format_v4f32(
 ; CHECK-NEXT: %tmp = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
-; CHECK-NEXT: %1 = shufflevector <2 x float> %tmp, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: %1 = shufflevector <2 x float> %tmp, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT: %tmp1 = bitcast <4 x float> %1 to <2 x double>
 ; CHECK-NEXT: %tmp2 = extractelement <2 x double> %tmp1, i32 0
 ; CHECK-NEXT: ret double %tmp2
@@ -1209,7 +1209,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4
 
 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
@@ -1809,7 +1809,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(
 
 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
@@ -2029,7 +2029,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v4f32(<4 x i32>
 
 ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
@@ -2292,7 +2292,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v4f32(<4 x i
 
 ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
@@ -2511,7 +2511,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_tbuffer_load_v4f32(<4 x i32> inr
 
 ; CHECK-LABEL: @extract_elt1_elt2_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
@@ -2798,7 +2798,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_
 
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(
 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 undef>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
@@ -2808,7 +2808,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_
 
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(
 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 undef>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
@@ -2993,7 +2993,7 @@ define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %d
 
 ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(
 ; CHECK-NEXT: %data = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32(i32 7, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %res = shufflevector <3 x half> %data, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT: %res = shufflevector <3 x half> %data, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 ; CHECK-NEXT: ret <4 x half> %res
 define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
@@ -3003,7 +3003,7 @@ define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32
 
 ; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(
 ; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32(i32 3, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %res = shufflevector <2 x half> %data, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: %res = shufflevector <2 x half> %data, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT: ret <4 x half> %res
 define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)

diff  --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
index f8e7789d5f02..855d396ed63d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
@@ -99,27 +99,27 @@ define amdgpu_ps float @extract_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i3
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt2_elt3_buffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
@@ -128,27 +128,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_buffer_load_v4f32(<4 x i32> inre
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_buffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_buffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -191,7 +191,7 @@ define amdgpu_ps { float, float, float } @extract_elt0_elt1_elt2_buffer_load_v4f
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_3(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
 ; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 undef, i32 1>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 undef, i32 1>
 ; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
 define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
@@ -199,7 +199,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i3
   %elt2 = extractelement <4 x float> %data, i32 2
   %ins0 = insertelement <2 x float> undef, float %elt0, i32 0
   %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 4, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 4, i32 1>
   %ret = fadd <2 x float> %ins1, %shuf
   ret <2 x float> %ret
 }
@@ -207,7 +207,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i3
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_4(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
 ; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
 ; CHECK-NEXT: ret <2 x float> %ret
 define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_4(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
@@ -224,7 +224,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_4(<4 x i3
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_5(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
 ; CHECK-NEXT: %ins1 = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 2, i32 2>
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT: %ret = fadd <2 x float> %ins1, %shuf
 define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_5(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
@@ -270,17 +270,17 @@ define amdgpu_ps float @extract_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i3
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v3f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -325,7 +325,7 @@ define amdgpu_ps float @extract_elt0_buffer_load_format_v2f32(<4 x i32> inreg %r
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -334,14 +334,14 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v3f32(<4 x i3
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; The initial insertion point is at the extractelement
 ; CHECK-LABEL: @extract01_bitcast_buffer_load_format_v4f32(
 ; CHECK-NEXT: %tmp = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
-; CHECK-NEXT: %1 = shufflevector <2 x float> %tmp, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: %1 = shufflevector <2 x float> %tmp, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT: %tmp1 = bitcast <4 x float> %1 to <2 x double>
 ; CHECK-NEXT: %tmp2 = extractelement <2 x double> %tmp1, i32 0
 ; CHECK-NEXT: ret double %tmp2
@@ -480,7 +480,7 @@ define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -490,7 +490,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32>
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -500,7 +500,7 @@ define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32>
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
@@ -509,7 +509,7 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32>
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
@@ -519,17 +519,17 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -567,7 +567,7 @@ define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -577,7 +577,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32>
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -674,7 +674,7 @@ define amdgpu_ps half @extract_elt3_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc,
 ; CHECK-NEXT: ret <2 x half>
 define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x half> %shuf
 }
 
@@ -737,7 +737,7 @@ define amdgpu_ps i8 @extract_elt3_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i3
 ; CHECK-NEXT: ret <2 x i8>
 define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x i8> %shuf
 }
 
@@ -837,7 +837,7 @@ define amdgpu_ps float @extract_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc,
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -847,7 +847,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v4f32(<4 x i32> in
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -857,7 +857,7 @@ define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v4f32(<4 x i32> in
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
@@ -866,17 +866,17 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_s_buffer_load_v4f32(<4 x i32> in
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_s_buffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -914,7 +914,7 @@ define amdgpu_ps float @extract_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc,
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -924,7 +924,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v3f32(<4 x i32> in
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -932,11 +932,11 @@ define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v3f32(<4 x i32> in
 ; to vec4 anyway during lowering.
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_s_buffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -1032,7 +1032,7 @@ define amdgpu_ps half @extract_elt3_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i
 ; CHECK-NEXT: ret <2 x half>
 define amdgpu_ps <2 x half> @extract_elt0_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
   %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
-  %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x half> %shuf
 }
 
@@ -1095,7 +1095,7 @@ define amdgpu_ps i8 @extract_elt3_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32
 ; CHECK-NEXT: ret <2 x i8>
 define amdgpu_ps <2 x i8> @extract_elt0_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
   %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0)
-  %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x i8> %shuf
 }
 
@@ -1203,27 +1203,27 @@ define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inre
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
@@ -1232,27 +1232,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -1290,17 +1290,17 @@ define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inre
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -1438,7 +1438,7 @@ define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %r
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -1448,7 +1448,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i3
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -1458,7 +1458,7 @@ define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i3
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
@@ -1467,7 +1467,7 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i3
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
@@ -1477,17 +1477,17 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -1525,7 +1525,7 @@ define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %r
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -1535,7 +1535,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i3
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -1632,7 +1632,7 @@ define amdgpu_ps half @extract_elt3_struct_buffer_load_v4f16(<4 x i32> inreg %rs
 ; CHECK-NEXT: ret <2 x half>
 define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x half> %shuf
 }
 
@@ -1695,7 +1695,7 @@ define amdgpu_ps i8 @extract_elt3_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc,
 ; CHECK-NEXT: ret <2 x i8>
 define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x i8> %shuf
 }
 
@@ -1803,27 +1803,27 @@ define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> i
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
@@ -1832,27 +1832,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -1890,17 +1890,17 @@ define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> i
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
   %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -2023,27 +2023,27 @@ define amdgpu_ps float @extract_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsr
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt2_elt3_raw_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
@@ -2052,27 +2052,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32>
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -2110,17 +2110,17 @@ define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsr
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v3f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 {
   %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -2286,27 +2286,27 @@ define amdgpu_ps float @extract_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt2_elt3_struct_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
@@ -2315,27 +2315,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -2373,17 +2373,17 @@ define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v3f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 {
   %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -2505,27 +2505,27 @@ define amdgpu_ps float @extract_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt2_elt3_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3>
   ret <2 x float> %shuf
 }
 
@@ -2534,27 +2534,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inr
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_elt3_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt2_elt3_tbuffer_load_v4f32(
 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
   %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -2592,17 +2592,17 @@ define amdgpu_ps float @extract_elt2_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i
 ; CHECK-NEXT: ret <2 x float>
 define amdgpu_ps <2 x float> @extract_elt0_elt1_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
   %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt1_elt2_tbuffer_load_v3f32(
 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT: ret <2 x float> %shuf
 define amdgpu_ps <2 x float> @extract_elt1_elt2_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 {
   %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false)
-  %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -2755,7 +2755,7 @@ define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float
 ; CHECK-NEXT: ret <2 x float> %1
 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -2764,7 +2764,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -2773,7 +2773,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -2782,7 +2782,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %shuf
 }
 
@@ -2792,27 +2792,27 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32
 ; CHECK-NEXT: ret <3 x float> %1
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(
 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 undef>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
 ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(
 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 undef>
 ; CHECK-NEXT: ret <3 x float> %shuf
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
@@ -2821,7 +2821,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
@@ -2830,7 +2830,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %shuf
 }
 
@@ -2924,7 +2924,7 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, flo
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %shuf
 }
 
@@ -2939,7 +2939,7 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float,
 ; CHECK-NEXT: ret <2 x float> %data
 define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %shuf
 }
 
@@ -2954,7 +2954,7 @@ declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i
 ; CHECK-NEXT: ret <3 x float> %data
 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %shuf
 }
 
@@ -2993,31 +2993,31 @@ define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %d
 
 ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(
 ; CHECK-NEXT: %data = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32(i32 7, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %res = shufflevector <3 x half> %data, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT: %res = shufflevector <3 x half> %data, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 ; CHECK-NEXT: ret <4 x half> %res
 define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
   ret <4 x half> %res
 }
 
 ; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(
 ; CHECK-NEXT: %data = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32(i32 3, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %res = shufflevector <2 x half> %data, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: %res = shufflevector <2 x half> %data, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT: ret <4 x half> %res
 define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x half> %res
 }
 
 ; CHECK-LABEL: @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(
 ; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-; CHECK-NEXT: %res = insertelement <4 x half> undef, half %data, i64 0
+; CHECK-NEXT: %res = insertelement <4 x half> poison, half %data, i64 0
 ; CHECK-NEXT: ret <4 x half> %res
 define amdgpu_ps <4 x half> @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
   %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
-  %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
+  %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
   ret <4 x half> %res
 }
 

diff  --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
index 413d8155787f..e504d3d0bb1b 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
@@ -30,7 +30,7 @@ define <8 x i32> @trivial_nop(<8 x i32> %vec, <8 x i32> %subvec) {
 
 define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) {
 ; CHECK-LABEL: @valid_insertion_a(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
 ;
@@ -40,7 +40,7 @@ define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) {
 
 define <8 x i32> @valid_insertion_b(<8 x i32> %vec, <2 x i32> %subvec) {
 ; CHECK-LABEL: @valid_insertion_b(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
 ;
@@ -50,7 +50,7 @@ define <8 x i32> @valid_insertion_b(<8 x i32> %vec, <2 x i32> %subvec) {
 
 define <8 x i32> @valid_insertion_c(<8 x i32> %vec, <2 x i32> %subvec) {
 ; CHECK-LABEL: @valid_insertion_c(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
 ;
@@ -60,7 +60,7 @@ define <8 x i32> @valid_insertion_c(<8 x i32> %vec, <2 x i32> %subvec) {
 
 define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) {
 ; CHECK-LABEL: @valid_insertion_d(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
 ;
@@ -70,7 +70,7 @@ define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) {
 
 define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) {
 ; CHECK-LABEL: @valid_insertion_e(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
 ;
@@ -80,7 +80,7 @@ define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) {
 
 define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) {
 ; CHECK-LABEL: @valid_insertion_f(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
 ;
@@ -90,7 +90,7 @@ define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) {
 
 define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) {
 ; CHECK-LABEL: @valid_insertion_g(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
 ;
@@ -100,7 +100,7 @@ define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) {
 
 define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) {
 ; CHECK-LABEL: @valid_insertion_h(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 9, i32 10, i32 6, i32 7>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
 ;

diff  --git a/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll
index ee47da779386..1b5e3ea0c656 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll
@@ -609,10 +609,10 @@ define void @load_factor2_wide3(<24 x i32>* %ptr) {
 ; NEON-NEXT:       [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 1
 ; NEON-NEXT:       [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 0
 ; NEON-NEXT:       [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; NEON-NEXT:       [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; NEON-NEXT:       [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; NEON-NEXT:       [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; NEON-NEXT:       [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; NEON-NEXT:       [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; NEON-NEXT:       [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; NEON-NEXT:       [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; NEON-NEXT:       ret void
 ; NO_NEON-LABEL: @load_factor2_wide3(

diff  --git a/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
index c8efb41ce737..1b5e3ea0c656 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll
@@ -16,8 +16,8 @@ define void @load_factor2(<16 x i8>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
-  %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   ret void
 }
 
@@ -34,9 +34,9 @@ define void @load_factor3(<12 x i32>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
-  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
   ret void
 }
 
@@ -54,10 +54,10 @@ define void @load_factor4(<16 x i32>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
-  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
-  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
-  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
   ret void
 }
 
@@ -90,7 +90,7 @@ define void @store_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x
 ; NO_NEON:         ret void
 ;
   %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
   store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
   ret void
@@ -130,8 +130,8 @@ define void @load_ptrvec_factor2(<4 x i32*>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <4 x i32*>, <4 x i32*>* %ptr, align 4
-  %v0 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
-  %v1 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> undef, <2 x i32> <i32 1, i32 3>
+  %v0 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> poison, <2 x i32> <i32 0, i32 2>
+  %v1 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> poison, <2 x i32> <i32 1, i32 3>
   ret void
 }
 
@@ -151,9 +151,9 @@ define void @load_ptrvec_factor3(<6 x i32*>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <6 x i32*>, <6 x i32*>* %ptr, align 4
-  %v0 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 0, i32 3>
-  %v1 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
-  %v2 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
+  %v0 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> poison, <2 x i32> <i32 0, i32 3>
+  %v1 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> poison, <2 x i32> <i32 1, i32 4>
+  %v2 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> poison, <2 x i32> <i32 2, i32 5>
   ret void
 }
 
@@ -175,10 +175,10 @@ define void @load_ptrvec_factor4(<8 x i32*>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <8 x i32*>, <8 x i32*>* %ptr, align 4
-  %v0 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 0, i32 4>
-  %v1 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
-  %v2 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 2, i32 6>
-  %v3 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
+  %v0 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> poison, <2 x i32> <i32 0, i32 4>
+  %v1 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> poison, <2 x i32> <i32 1, i32 5>
+  %v2 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> poison, <2 x i32> <i32 2, i32 6>
+  %v3 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> poison, <2 x i32> <i32 3, i32 7>
   ret void
 }
 
@@ -215,7 +215,7 @@ define void @store_ptrvec_factor3(<6 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %
 ; NO_NEON:         ret void
 ;
   %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
   store <6 x i32*> %interleaved.vec, <6 x i32*>* %ptr, align 4
   ret void
@@ -255,8 +255,8 @@ define void @load_undef_mask_factor2(<8 x i32>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
-  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
-  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
+  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
   ret void
 }
 
@@ -273,9 +273,9 @@ define void @load_undef_mask_factor3(<12 x i32>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
-  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
   ret void
 }
 
@@ -293,10 +293,10 @@ define void @load_undef_mask_factor4(<16 x i32>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
-  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
-  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
-  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
-  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
   ret void
 }
 
@@ -329,7 +329,7 @@ define void @store_undef_mask_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32>
 ; NO_NEON:         ret void
 ;
   %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
   store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
   ret void
@@ -364,7 +364,7 @@ define void @load_illegal_factor2(<3 x float>* %ptr) nounwind {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <3 x float>, <3 x float>* %ptr, align 16
-  %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
   ret void
 }
 
@@ -376,7 +376,7 @@ define void @store_illegal_factor2(<3 x float>* %ptr, <3 x float> %v0) nounwind
 ; NO_NEON-NOT:     @llvm.aarch64.neon
 ; NO_NEON:         ret void
 ;
-  %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
   store <3 x float> %interleaved.vec, <3 x float>* %ptr, align 16
   ret void
 }
@@ -586,8 +586,8 @@ define void @load_factor2_wide2(<16 x i32>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
-  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   ret void
 }
 
@@ -609,10 +609,10 @@ define void @load_factor2_wide3(<24 x i32>* %ptr) {
 ; NEON-NEXT:       [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 1
 ; NEON-NEXT:       [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 0
 ; NEON-NEXT:       [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; NEON-NEXT:       [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; NEON-NEXT:       [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; NEON-NEXT:       [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; NEON-NEXT:       [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; NEON-NEXT:       [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; NEON-NEXT:       [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; NEON-NEXT:       [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; NEON-NEXT:       ret void
 ; NO_NEON-LABEL: @load_factor2_wide3(
@@ -620,8 +620,8 @@ define void @load_factor2_wide3(<24 x i32>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <24 x i32>, <24 x i32>* %ptr, align 4
-  %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22>
-  %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <12 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23>
+  %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22>
+  %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <12 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23>
   ret void
 }
 
@@ -648,9 +648,9 @@ define void @load_factor3_wide(<24 x i32>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <24 x i32>, <24 x i32>* %ptr, align 4
-  %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-  %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
-  %v2 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
+  %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
+  %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
+  %v2 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
   ret void
 }
 
@@ -680,10 +680,10 @@ define void @load_factor4_wide(<32 x i32>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <32 x i32>, <32 x i32>* %ptr, align 4
-  %v0 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
-  %v1 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
-  %v2 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
-  %v3 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
+  %v0 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+  %v1 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+  %v2 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
+  %v3 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
   ret void
 }
 
@@ -727,7 +727,7 @@ define void @store_factor3_wide(<24 x i32>* %ptr, <8 x i32> %v0, <8 x i32> %v1,
 ; NO_NEON:         ret void
 ;
   %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %s1 = shufflevector <8 x i32> %v2, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %s1 = shufflevector <8 x i32> %v2, <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
   store <24 x i32> %interleaved.vec, <24 x i32>* %ptr, align 4
   ret void
@@ -770,8 +770,8 @@ define void @load_factor2_fp128(<4 x fp128>* %ptr) {
 ; NO_NEON:         ret void
 ;
   %interleaved.vec = load <4 x fp128>, <4 x fp128>* %ptr, align 16
-  %v0 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> <i32 0, i32 2>
-  %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> <i32 1, i32 3>
+  %v0 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> poison, <2 x i32> <i32 0, i32 2>
+  %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> poison, <2 x i32> <i32 1, i32 3>
   ret void
 }
 
@@ -794,8 +794,8 @@ define <4 x i1> @load_large_vector(<12 x i64 *>* %p) {
 ; NO_NEON:         ret
 ;
   %l = load <12 x i64 *>, <12 x i64 *>* %p
-  %s1 = shufflevector <12 x i64 *> %l, <12 x i64 *> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-  %s2 = shufflevector <12 x i64 *> %l, <12 x i64 *> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %s1 = shufflevector <12 x i64 *> %l, <12 x i64 *> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %s2 = shufflevector <12 x i64 *> %l, <12 x i64 *> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
   %ret = icmp ne <4 x i64 *> %s1, %s2
   ret <4 x i1> %ret
 }

diff  --git a/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-inseltpoison.ll
index 05d228d933c1..2526d26271b6 100644
--- a/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-inseltpoison.ll
+++ b/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-inseltpoison.ll
@@ -1086,10 +1086,10 @@ define void @load_factor2_wide3(<24 x i32>* %ptr) {
 ; CHECK-NEON-NEXT:    [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1
 ; CHECK-NEON-NEXT:    [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0
 ; CHECK-NEON-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEON-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEON-NEXT:    [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEON-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT:    [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEON-NEXT:    [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEON-NEXT:    [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEON-NEXT:    ret void
 ;
@@ -1107,10 +1107,10 @@ define void @load_factor2_wide3(<24 x i32>* %ptr) {
 ; CHECK-MVE-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1
 ; CHECK-MVE-NEXT:    [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0
 ; CHECK-MVE-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-MVE-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-MVE-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-MVE-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-MVE-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-MVE-NEXT:    [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-MVE-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
index cc41135d55da..67a2cbb8412f 100644
--- a/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
+++ b/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll
@@ -1086,10 +1086,10 @@ define void @load_factor2_wide3(<24 x i32>* %ptr) {
 ; CHECK-NEON-NEXT:    [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1
 ; CHECK-NEON-NEXT:    [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0
 ; CHECK-NEON-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEON-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEON-NEXT:    [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEON-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEON-NEXT:    [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEON-NEXT:    [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEON-NEXT:    [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEON-NEXT:    ret void
 ;
@@ -1107,10 +1107,10 @@ define void @load_factor2_wide3(<24 x i32>* %ptr) {
 ; CHECK-MVE-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1
 ; CHECK-MVE-NEXT:    [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0
 ; CHECK-MVE-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-MVE-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-MVE-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-MVE-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-MVE-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-MVE-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-MVE-NEXT:    [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-MVE-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore-inseltpoison.ll
index a32c125ec7d1..53d6606477b9 100644
--- a/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore-inseltpoison.ll
+++ b/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore-inseltpoison.ll
@@ -117,11 +117,11 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP8]], <16 x i8> [[TMP9]], <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP8]], <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP11]], <16 x i8> undef, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
-; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> undef, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> undef, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP11]], <16 x i8> poison, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
+; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> poison, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <32 x i8> [[TMP17]], <32 x i8> [[TMP18]], <48 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
 ; CHECK-NEXT:    store <48 x i8> [[TMP19]], <48 x i8>* [[P:%.*]], align 1
 ; CHECK-NEXT:    ret void
@@ -152,7 +152,7 @@ define void @interleaved_store_vf32_i8_stride3(<32 x i8> %a, <32 x i8> %b, <32 x
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> [[TMP11]], <32 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5, i32 48, i32 59, i32 54, i32 49, i32 60, i32 55, i32 50, i32 61, i32 56, i32 51, i32 62, i32 57, i32 52, i32 63, i32 58, i32 53>
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> [[TMP13]], <32 x i32> <i32 16, i32 27, i32 22, i32 17, i32 28, i32 23, i32 18, i32 29, i32 24, i32 19, i32 30, i32 25, i32 20, i32 31, i32 26, i32 21, i32 48, i32 59, i32 54, i32 49, i32 60, i32 55, i32 50, i32 61, i32 56, i32 51, i32 62, i32 57, i32 52, i32 63, i32 58, i32 53>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <32 x i8> [[TMP14]], <32 x i8> [[TMP15]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
-; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <64 x i8> [[TMP17]], <64 x i8> [[TMP18]], <96 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95>
 ; CHECK-NEXT:    store <96 x i8> [[TMP19]], <96 x i8>* [[P:%.*]], align 1
 ; CHECK-NEXT:    ret void
@@ -189,7 +189,7 @@ define void @interleaved_store_vf64_i8_stride3(<64 x i8> %a, <64 x i8> %b, <64 x
 ; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> [[TMP17]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <32 x i8> [[TMP18]], <32 x i8> [[TMP19]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <64 x i8> [[TMP20]], <64 x i8> [[TMP21]], <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127>
-; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <64 x i8> [[TMP22]], <64 x i8> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <64 x i8> [[TMP22]], <64 x i8> poison, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <128 x i8> [[TMP23]], <128 x i8> [[TMP24]], <192 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127, i32 128, i32 129, i32 130, i32 131, i32 132, i32 133, i32 134, i32 135, i32 136, i32 137, i32 138, i32 139, i32 140, i32 141, i32 142, i32 143, i32 144, i32 145, i32 146, i32 147, i32 148, i32 149, i32 150, i32 151, i32 152, i32 153, i32 154, i32 155, i32 156, i32 157, i32 158, i32 159, i32 160, i32 161, i32 162, i32 163, i32 164, i32 165, i32 166, i32 167, i32 168, i32 169, i32 170, i32 171, i32 172, i32 173, i32 174, i32 175, i32 176, i32 177, i32 178, i32 179, i32 180, i32 181, i32 182, i32 183, i32 184, i32 185, i32 186, i32 187, i32 188, i32 189, i32 190, i32 191>
 ; CHECK-NEXT:    store <192 x i8> [[TMP25]], <192 x i8>* [[P:%.*]], align 1
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll
index ec8f4d6e9355..1a11b191e5d4 100644
--- a/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll
+++ b/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll
@@ -117,11 +117,11 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP8]], <16 x i8> [[TMP9]], <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP8]], <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP11]], <16 x i8> undef, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
-; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> undef, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> undef, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP11]], <16 x i8> poison, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
+; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> poison, <16 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <32 x i8> [[TMP17]], <32 x i8> [[TMP18]], <48 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
 ; CHECK-NEXT:    store <48 x i8> [[TMP19]], <48 x i8>* [[P:%.*]], align 1
 ; CHECK-NEXT:    ret void
@@ -152,7 +152,7 @@ define void @interleaved_store_vf32_i8_stride3(<32 x i8> %a, <32 x i8> %b, <32 x
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> [[TMP11]], <32 x i32> <i32 0, i32 11, i32 6, i32 1, i32 12, i32 7, i32 2, i32 13, i32 8, i32 3, i32 14, i32 9, i32 4, i32 15, i32 10, i32 5, i32 48, i32 59, i32 54, i32 49, i32 60, i32 55, i32 50, i32 61, i32 56, i32 51, i32 62, i32 57, i32 52, i32 63, i32 58, i32 53>
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> [[TMP13]], <32 x i32> <i32 16, i32 27, i32 22, i32 17, i32 28, i32 23, i32 18, i32 29, i32 24, i32 19, i32 30, i32 25, i32 20, i32 31, i32 26, i32 21, i32 48, i32 59, i32 54, i32 49, i32 60, i32 55, i32 50, i32 61, i32 56, i32 51, i32 62, i32 57, i32 52, i32 63, i32 58, i32 53>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <32 x i8> [[TMP14]], <32 x i8> [[TMP15]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
-; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <64 x i8> [[TMP17]], <64 x i8> [[TMP18]], <96 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95>
 ; CHECK-NEXT:    store <96 x i8> [[TMP19]], <96 x i8>* [[P:%.*]], align 1
 ; CHECK-NEXT:    ret void
@@ -189,7 +189,7 @@ define void @interleaved_store_vf64_i8_stride3(<64 x i8> %a, <64 x i8> %b, <64 x
 ; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> [[TMP17]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <32 x i8> [[TMP18]], <32 x i8> [[TMP19]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <64 x i8> [[TMP20]], <64 x i8> [[TMP21]], <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127>
-; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <64 x i8> [[TMP22]], <64 x i8> undef, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <64 x i8> [[TMP22]], <64 x i8> poison, <128 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <128 x i8> [[TMP23]], <128 x i8> [[TMP24]], <192 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 76, i32 77, i32 78, i32 79, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 92, i32 93, i32 94, i32 95, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 108, i32 109, i32 110, i32 111, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123, i32 124, i32 125, i32 126, i32 127, i32 128, i32 129, i32 130, i32 131, i32 132, i32 133, i32 134, i32 135, i32 136, i32 137, i32 138, i32 139, i32 140, i32 141, i32 142, i32 143, i32 144, i32 145, i32 146, i32 147, i32 148, i32 149, i32 150, i32 151, i32 152, i32 153, i32 154, i32 155, i32 156, i32 157, i32 158, i32 159, i32 160, i32 161, i32 162, i32 163, i32 164, i32 165, i32 166, i32 167, i32 168, i32 169, i32 170, i32 171, i32 172, i32 173, i32 174, i32 175, i32 176, i32 177, i32 178, i32 179, i32 180, i32 181, i32 182, i32 183, i32 184, i32 185, i32 186, i32 187, i32 188, i32 189, i32 190, i32 191>
 ; CHECK-NEXT:    store <192 x i8> [[TMP25]], <192 x i8>* [[P:%.*]], align 1
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
index d6f41f94f59e..f53345334bb5 100644
--- a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
@@ -62,7 +62,7 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
 ; GFX9-NEXT:    [[BIN_RDX18:%.*]] = fadd fast <2 x half> [[TMP21]], [[BIN_RDX17]]
 ; GFX9-NEXT:    [[BIN_RDX19:%.*]] = fadd fast <2 x half> [[TMP22]], [[BIN_RDX18]]
 ; GFX9-NEXT:    [[BIN_RDX20:%.*]] = fadd fast <2 x half> [[TMP23]], [[BIN_RDX19]]
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x half> [[BIN_RDX20]], <2 x half> undef, <2 x i32> <i32 1, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x half> [[BIN_RDX20]], <2 x half> poison, <2 x i32> <i32 1, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX21:%.*]] = fadd fast <2 x half> [[BIN_RDX20]], [[RDX_SHUF]]
 ; GFX9-NEXT:    [[TMP25:%.*]] = extractelement <2 x half> [[BIN_RDX21]], i32 0
 ; GFX9-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -132,7 +132,7 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
 ; VI-NEXT:    [[BIN_RDX18:%.*]] = fadd fast <2 x half> [[TMP21]], [[BIN_RDX17]]
 ; VI-NEXT:    [[BIN_RDX19:%.*]] = fadd fast <2 x half> [[TMP22]], [[BIN_RDX18]]
 ; VI-NEXT:    [[BIN_RDX20:%.*]] = fadd fast <2 x half> [[TMP23]], [[BIN_RDX19]]
-; VI-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x half> [[BIN_RDX20]], <2 x half> undef, <2 x i32> <i32 1, i32 undef>
+; VI-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x half> [[BIN_RDX20]], <2 x half> poison, <2 x i32> <i32 1, i32 undef>
 ; VI-NEXT:    [[BIN_RDX21:%.*]] = fadd fast <2 x half> [[BIN_RDX20]], [[RDX_SHUF]]
 ; VI-NEXT:    [[TMP25:%.*]] = extractelement <2 x half> [[BIN_RDX21]], i32 0
 ; VI-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll
index 38bcb8e6c9fb..61ebfa4dcb52 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll
@@ -65,9 +65,9 @@ define i32 @test(float* nocapture readonly %x) {
 ; CHECK-NEXT:    [[TMP16]] = fsub fast <2 x double> [[VEC_PHI]], [[TMP15]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[TMP16]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[TMP16]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <2 x double> [[TMP16]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x double> [[BIN_RDX]], i32 0
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[T]], [[N_VEC]]
@@ -93,7 +93,7 @@ define i32 @test(float* nocapture readonly %x) {
 ; CHECK-NEXT:    [[SUB127]] = fsub fast double [[DVAL1_4131]], [[MUL126]]
 ; CHECK-NEXT:    [[INC129]] = add nuw nsw i32 [[I_2132]], 1
 ; CHECK-NEXT:    [[EXITCOND143:%.*]] = icmp eq i32 [[INC129]], [[T]]
-; CHECK-NEXT:    br i1 [[EXITCOND143]], label [[OUTEREND]], label [[INNERLOOP]], !llvm.loop !2
+; CHECK-NEXT:    br i1 [[EXITCOND143]], label [[OUTEREND]], label [[INNERLOOP]], [[LOOP2:!llvm.loop !.*]]
 ; CHECK:       outerend:
 ; CHECK-NEXT:    [[SUB127_LCSSA:%.*]] = phi double [ [[SUB127]], [[INNERLOOP]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[CONV138:%.*]] = fptosi double [[SUB127_LCSSA]] to i32

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
index f5626598f29c..7054c1b74bc5 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
@@ -19,10 +19,10 @@ define dso_local double @test(float* %Arr) {
 ; CHECK-NEXT:    [[TMP5]] = fadd fast <2 x double> [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi <2 x double> [ [[TMP5]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[DOTLCSSA]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[DOTLCSSA]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <2 x double> [[DOTLCSSA]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[BIN_RDX]], i32 0
 ; CHECK-NEXT:    ret double [[TMP7]]

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
index a6ea7329ee80..727d4477c0fd 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
@@ -18,9 +18,9 @@ define dso_local double @test(float* %Arr) {
 ; CHECK-NEXT:    [[TMP5]] = fadd fast <2 x double> [[VEC_PHI]], [[TMP4]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <2 x double> [[TMP5]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[BIN_RDX]], i32 0
 ; CHECK-NEXT:    ret double [[TMP7]]

diff  --git a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
index 4f3648404c18..d6fc86956023 100644
--- a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
@@ -68,10 +68,10 @@ define float @minloopattr(float* nocapture readonly %arg) #0 {
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP5]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP5]], <4 x float> [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = fcmp fast olt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
index 3e0308bb00f4..dbc90bcf4519 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -76,7 +76,7 @@ for.end:                                          ; preds = %for.body
 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2>
 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
 ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
 ; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4
 
@@ -159,9 +159,9 @@ define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP5]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX5:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[BIN_RDX5]], i32 0
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]

diff  --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
index 1ac402000f2d..69b171b10617 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -398,9 +398,9 @@ for.end:                                          ; preds = %for.body
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP5]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[DOTLCSSA]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[DOTLCSSA]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[DOTLCSSA]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX6:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index d800351d7eb9..df7fcf3b2bbe 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -6,9 +6,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: add <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ;CHECK: add <4 x i32>
 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
@@ -41,9 +41,9 @@ define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: mul <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: mul <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ;CHECK: mul <4 x i32>
 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
@@ -76,9 +76,9 @@ define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocap
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: mul nsw <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: add <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ;CHECK: add <4 x i32>
 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
@@ -109,9 +109,9 @@ define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 
 ;CHECK-LABEL: @reduction_mul(
 ;CHECK: mul <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: mul <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ;CHECK: mul <4 x i32>
 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
@@ -143,9 +143,9 @@ define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK-LABEL: @start_at_non_zero(
 ;CHECK: phi <4 x i32>
 ;CHECK: <i32 120, i32 0, i32 0, i32 0>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: add <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ;CHECK: add <4 x i32>
 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
@@ -176,9 +176,9 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-LABEL: @reduction_and(
 ;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
 ;CHECK: and <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: and <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ;CHECK: and <4 x i32>
 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
@@ -208,9 +208,9 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;CHECK-LABEL: @reduction_or(
 ;CHECK: or <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: or <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ;CHECK: or <4 x i32>
 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
@@ -240,9 +240,9 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;CHECK-LABEL: @reduction_xor(
 ;CHECK: xor <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: xor <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ;CHECK: xor <4 x i32>
 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
@@ -498,9 +498,9 @@ exit:
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: add <4 x i32>
-;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ;CHECK: add <4 x i32>
 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: %sum.copy = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ]

diff  --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll
index bc73494d6a57..d5caf1183dff 100644
--- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll
@@ -41,10 +41,10 @@ define i32 @test(i64 %N, i32 %x) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP4]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP4]], <4 x i32> [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP6:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF5]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT7:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP6]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT7]], i32 0

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll
index 9ba0f30ba142..4fe8aac79f7b 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll
@@ -41,176 +41,176 @@ define void @transpose_multiply(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr, <9 x
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 1
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <3 x double> [[TMP17]], double [[TMP18]], i64 2
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = fadd <1 x double> [[TMP21]], [[TMP23]]
-; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP25]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP26:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = fadd <1 x double> [[TMP24]], [[TMP26]]
-; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <1 x double> [[TMP27]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <1 x double> [[TMP27]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP28]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
-; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP33:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP34:%.*]] = fadd <1 x double> [[TMP31]], [[TMP33]]
-; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP36:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = fadd <1 x double> [[TMP34]], [[TMP36]]
-; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <1 x double> [[TMP37]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <1 x double> [[TMP37]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP39:%.*]] = shufflevector <3 x double> [[TMP29]], <3 x double> [[TMP38]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP41:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]]
-; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[BLOCK32]], [[SPLAT_SPLAT34]]
 ; CHECK-NEXT:    [[TMP44:%.*]] = fadd <1 x double> [[TMP41]], [[TMP43]]
-; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP45:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP45]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP46:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP47:%.*]] = fadd <1 x double> [[TMP44]], [[TMP46]]
-; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <1 x double> [[TMP47]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <1 x double> [[TMP47]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP49:%.*]] = shufflevector <3 x double> [[TMP39]], <3 x double> [[TMP48]], <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP50]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP51:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]]
-; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP53:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]]
 ; CHECK-NEXT:    [[TMP54:%.*]] = fadd <1 x double> [[TMP51]], [[TMP53]]
-; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP55:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP55]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP56:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]]
 ; CHECK-NEXT:    [[TMP57:%.*]] = fadd <1 x double> [[TMP54]], [[TMP56]]
-; CHECK-NEXT:    [[TMP58:%.*]] = shufflevector <1 x double> [[TMP57]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP58:%.*]] = shufflevector <1 x double> [[TMP57]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP59:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP58]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP60]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP61:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]]
-; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP63:%.*]] = fmul <1 x double> [[BLOCK50]], [[SPLAT_SPLAT52]]
 ; CHECK-NEXT:    [[TMP64:%.*]] = fadd <1 x double> [[TMP61]], [[TMP63]]
-; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP65:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP65]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP66:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]]
 ; CHECK-NEXT:    [[TMP67:%.*]] = fadd <1 x double> [[TMP64]], [[TMP66]]
-; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <1 x double> [[TMP67]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <1 x double> [[TMP67]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP69:%.*]] = shufflevector <3 x double> [[TMP59]], <3 x double> [[TMP68]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK56:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK56:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT57:%.*]] = insertelement <1 x double> poison, double [[TMP70]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT58:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT57]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP71:%.*]] = fmul <1 x double> [[BLOCK56]], [[SPLAT_SPLAT58]]
-; CHECK-NEXT:    [[BLOCK59:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK59:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP72:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT60]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP73:%.*]] = fmul <1 x double> [[BLOCK59]], [[SPLAT_SPLAT61]]
 ; CHECK-NEXT:    [[TMP74:%.*]] = fadd <1 x double> [[TMP71]], [[TMP73]]
-; CHECK-NEXT:    [[BLOCK62:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK62:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP75:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT63:%.*]] = insertelement <1 x double> poison, double [[TMP75]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT64:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT63]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP76:%.*]] = fmul <1 x double> [[BLOCK62]], [[SPLAT_SPLAT64]]
 ; CHECK-NEXT:    [[TMP77:%.*]] = fadd <1 x double> [[TMP74]], [[TMP76]]
-; CHECK-NEXT:    [[TMP78:%.*]] = shufflevector <1 x double> [[TMP77]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP78:%.*]] = shufflevector <1 x double> [[TMP77]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP79:%.*]] = shufflevector <3 x double> [[TMP69]], <3 x double> [[TMP78]], <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[BLOCK65:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK65:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT66:%.*]] = insertelement <1 x double> poison, double [[TMP80]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT67:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT66]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP81:%.*]] = fmul <1 x double> [[BLOCK65]], [[SPLAT_SPLAT67]]
-; CHECK-NEXT:    [[BLOCK68:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK68:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT69:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT70:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT69]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP83:%.*]] = fmul <1 x double> [[BLOCK68]], [[SPLAT_SPLAT70]]
 ; CHECK-NEXT:    [[TMP84:%.*]] = fadd <1 x double> [[TMP81]], [[TMP83]]
-; CHECK-NEXT:    [[BLOCK71:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK71:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP85:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT72:%.*]] = insertelement <1 x double> poison, double [[TMP85]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT73:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT72]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP86:%.*]] = fmul <1 x double> [[BLOCK71]], [[SPLAT_SPLAT73]]
 ; CHECK-NEXT:    [[TMP87:%.*]] = fadd <1 x double> [[TMP84]], [[TMP86]]
-; CHECK-NEXT:    [[TMP88:%.*]] = shufflevector <1 x double> [[TMP87]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP88:%.*]] = shufflevector <1 x double> [[TMP87]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP89:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP88]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK74:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK74:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT75:%.*]] = insertelement <1 x double> poison, double [[TMP90]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT76:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT75]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP91:%.*]] = fmul <1 x double> [[BLOCK74]], [[SPLAT_SPLAT76]]
-; CHECK-NEXT:    [[BLOCK77:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK77:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT78:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT79:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT78]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP93:%.*]] = fmul <1 x double> [[BLOCK77]], [[SPLAT_SPLAT79]]
 ; CHECK-NEXT:    [[TMP94:%.*]] = fadd <1 x double> [[TMP91]], [[TMP93]]
-; CHECK-NEXT:    [[BLOCK80:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK80:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP95:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT81:%.*]] = insertelement <1 x double> poison, double [[TMP95]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT82:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT81]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP96:%.*]] = fmul <1 x double> [[BLOCK80]], [[SPLAT_SPLAT82]]
 ; CHECK-NEXT:    [[TMP97:%.*]] = fadd <1 x double> [[TMP94]], [[TMP96]]
-; CHECK-NEXT:    [[TMP98:%.*]] = shufflevector <1 x double> [[TMP97]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP98:%.*]] = shufflevector <1 x double> [[TMP97]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP99:%.*]] = shufflevector <3 x double> [[TMP89]], <3 x double> [[TMP98]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK83:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK83:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT84:%.*]] = insertelement <1 x double> poison, double [[TMP100]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT85:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT84]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP101:%.*]] = fmul <1 x double> [[BLOCK83]], [[SPLAT_SPLAT85]]
-; CHECK-NEXT:    [[BLOCK86:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK86:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP102:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT87:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT88:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT87]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP103:%.*]] = fmul <1 x double> [[BLOCK86]], [[SPLAT_SPLAT88]]
 ; CHECK-NEXT:    [[TMP104:%.*]] = fadd <1 x double> [[TMP101]], [[TMP103]]
-; CHECK-NEXT:    [[BLOCK89:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK89:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP105:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT90:%.*]] = insertelement <1 x double> poison, double [[TMP105]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT91:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT90]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP106:%.*]] = fmul <1 x double> [[BLOCK89]], [[SPLAT_SPLAT91]]
 ; CHECK-NEXT:    [[TMP107:%.*]] = fadd <1 x double> [[TMP104]], [[TMP106]]
-; CHECK-NEXT:    [[TMP108:%.*]] = shufflevector <1 x double> [[TMP107]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP108:%.*]] = shufflevector <1 x double> [[TMP107]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP109:%.*]] = shufflevector <3 x double> [[TMP99]], <3 x double> [[TMP108]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP110:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double*
 ; CHECK-NEXT:    [[VEC_CAST92:%.*]] = bitcast double* [[TMP110]] to <3 x double>*
@@ -287,176 +287,176 @@ define void @transpose_multiply_add(<9 x double>* %A.Ptr, <9 x double>* %B.Ptr,
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 1
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 2
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <3 x double> [[TMP17]], double [[TMP18]], i64 2
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = fadd <1 x double> [[TMP21]], [[TMP23]]
-; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP25]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP26:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = fadd <1 x double> [[TMP24]], [[TMP26]]
-; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <1 x double> [[TMP27]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <1 x double> [[TMP27]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP28]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
-; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP33:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP34:%.*]] = fadd <1 x double> [[TMP31]], [[TMP33]]
-; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP36:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = fadd <1 x double> [[TMP34]], [[TMP36]]
-; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <1 x double> [[TMP37]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <1 x double> [[TMP37]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP39:%.*]] = shufflevector <3 x double> [[TMP29]], <3 x double> [[TMP38]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP40]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP41:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]]
-; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[BLOCK32]], [[SPLAT_SPLAT34]]
 ; CHECK-NEXT:    [[TMP44:%.*]] = fadd <1 x double> [[TMP41]], [[TMP43]]
-; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP45:%.*]] = extractelement <3 x double> [[COL_LOAD7]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP45]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP46:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP47:%.*]] = fadd <1 x double> [[TMP44]], [[TMP46]]
-; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <1 x double> [[TMP47]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <1 x double> [[TMP47]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP49:%.*]] = shufflevector <3 x double> [[TMP39]], <3 x double> [[TMP48]], <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP50]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP51:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]]
-; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP53:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]]
 ; CHECK-NEXT:    [[TMP54:%.*]] = fadd <1 x double> [[TMP51]], [[TMP53]]
-; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP55:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP55]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP56:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]]
 ; CHECK-NEXT:    [[TMP57:%.*]] = fadd <1 x double> [[TMP54]], [[TMP56]]
-; CHECK-NEXT:    [[TMP58:%.*]] = shufflevector <1 x double> [[TMP57]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP58:%.*]] = shufflevector <1 x double> [[TMP57]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP59:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP58]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP60:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP60]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP61:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]]
-; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP63:%.*]] = fmul <1 x double> [[BLOCK50]], [[SPLAT_SPLAT52]]
 ; CHECK-NEXT:    [[TMP64:%.*]] = fadd <1 x double> [[TMP61]], [[TMP63]]
-; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP65:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP65]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP66:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]]
 ; CHECK-NEXT:    [[TMP67:%.*]] = fadd <1 x double> [[TMP64]], [[TMP66]]
-; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <1 x double> [[TMP67]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <1 x double> [[TMP67]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP69:%.*]] = shufflevector <3 x double> [[TMP59]], <3 x double> [[TMP68]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK56:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK56:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP70:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT57:%.*]] = insertelement <1 x double> poison, double [[TMP70]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT58:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT57]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP71:%.*]] = fmul <1 x double> [[BLOCK56]], [[SPLAT_SPLAT58]]
-; CHECK-NEXT:    [[BLOCK59:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK59:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP72:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT60]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP73:%.*]] = fmul <1 x double> [[BLOCK59]], [[SPLAT_SPLAT61]]
 ; CHECK-NEXT:    [[TMP74:%.*]] = fadd <1 x double> [[TMP71]], [[TMP73]]
-; CHECK-NEXT:    [[BLOCK62:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK62:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP75:%.*]] = extractelement <3 x double> [[COL_LOAD10]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT63:%.*]] = insertelement <1 x double> poison, double [[TMP75]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT64:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT63]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP76:%.*]] = fmul <1 x double> [[BLOCK62]], [[SPLAT_SPLAT64]]
 ; CHECK-NEXT:    [[TMP77:%.*]] = fadd <1 x double> [[TMP74]], [[TMP76]]
-; CHECK-NEXT:    [[TMP78:%.*]] = shufflevector <1 x double> [[TMP77]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP78:%.*]] = shufflevector <1 x double> [[TMP77]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP79:%.*]] = shufflevector <3 x double> [[TMP69]], <3 x double> [[TMP78]], <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[BLOCK65:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK65:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP80:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT66:%.*]] = insertelement <1 x double> poison, double [[TMP80]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT67:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT66]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP81:%.*]] = fmul <1 x double> [[BLOCK65]], [[SPLAT_SPLAT67]]
-; CHECK-NEXT:    [[BLOCK68:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK68:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT69:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT70:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT69]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP83:%.*]] = fmul <1 x double> [[BLOCK68]], [[SPLAT_SPLAT70]]
 ; CHECK-NEXT:    [[TMP84:%.*]] = fadd <1 x double> [[TMP81]], [[TMP83]]
-; CHECK-NEXT:    [[BLOCK71:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK71:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP85:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT72:%.*]] = insertelement <1 x double> poison, double [[TMP85]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT73:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT72]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP86:%.*]] = fmul <1 x double> [[BLOCK71]], [[SPLAT_SPLAT73]]
 ; CHECK-NEXT:    [[TMP87:%.*]] = fadd <1 x double> [[TMP84]], [[TMP86]]
-; CHECK-NEXT:    [[TMP88:%.*]] = shufflevector <1 x double> [[TMP87]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP88:%.*]] = shufflevector <1 x double> [[TMP87]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP89:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP88]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK74:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK74:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP90:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT75:%.*]] = insertelement <1 x double> poison, double [[TMP90]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT76:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT75]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP91:%.*]] = fmul <1 x double> [[BLOCK74]], [[SPLAT_SPLAT76]]
-; CHECK-NEXT:    [[BLOCK77:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK77:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT78:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT79:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT78]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP93:%.*]] = fmul <1 x double> [[BLOCK77]], [[SPLAT_SPLAT79]]
 ; CHECK-NEXT:    [[TMP94:%.*]] = fadd <1 x double> [[TMP91]], [[TMP93]]
-; CHECK-NEXT:    [[BLOCK80:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK80:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP95:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT81:%.*]] = insertelement <1 x double> poison, double [[TMP95]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT82:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT81]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP96:%.*]] = fmul <1 x double> [[BLOCK80]], [[SPLAT_SPLAT82]]
 ; CHECK-NEXT:    [[TMP97:%.*]] = fadd <1 x double> [[TMP94]], [[TMP96]]
-; CHECK-NEXT:    [[TMP98:%.*]] = shufflevector <1 x double> [[TMP97]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP98:%.*]] = shufflevector <1 x double> [[TMP97]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP99:%.*]] = shufflevector <3 x double> [[TMP89]], <3 x double> [[TMP98]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK83:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK83:%.*]] = shufflevector <3 x double> [[TMP7]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP100:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT84:%.*]] = insertelement <1 x double> poison, double [[TMP100]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT85:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT84]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP101:%.*]] = fmul <1 x double> [[BLOCK83]], [[SPLAT_SPLAT85]]
-; CHECK-NEXT:    [[BLOCK86:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK86:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP102:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT87:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT88:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT87]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP103:%.*]] = fmul <1 x double> [[BLOCK86]], [[SPLAT_SPLAT88]]
 ; CHECK-NEXT:    [[TMP104:%.*]] = fadd <1 x double> [[TMP101]], [[TMP103]]
-; CHECK-NEXT:    [[BLOCK89:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK89:%.*]] = shufflevector <3 x double> [[TMP19]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP105:%.*]] = extractelement <3 x double> [[COL_LOAD13]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT90:%.*]] = insertelement <1 x double> poison, double [[TMP105]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT91:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT90]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP106:%.*]] = fmul <1 x double> [[BLOCK89]], [[SPLAT_SPLAT91]]
 ; CHECK-NEXT:    [[TMP107:%.*]] = fadd <1 x double> [[TMP104]], [[TMP106]]
-; CHECK-NEXT:    [[TMP108:%.*]] = shufflevector <1 x double> [[TMP107]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP108:%.*]] = shufflevector <1 x double> [[TMP107]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP109:%.*]] = shufflevector <3 x double> [[TMP99]], <3 x double> [[TMP108]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP110:%.*]] = bitcast <9 x double>* [[C_PTR:%.*]] to double*
 ; CHECK-NEXT:    [[VEC_CAST92:%.*]] = bitcast double* [[TMP110]] to <3 x double>*

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll
index 62432e9611af..d836d8d73729 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll
@@ -15,57 +15,57 @@ define void @test(i32 %r, i32 %c) {
 ; CHECK-NEXT:    store i32 [[C:%.*]], i32* [[C_ADDR]], align 4
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* bitcast ([5 x <4 x double>]* @foo to <2 x double>*), align 8
 ; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([5 x <4 x double>], [5 x <4 x double>]* @foo, i32 0, i32 0, i64 2) to <2 x double>*), align 8
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK2:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK2:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT3]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[BLOCK2]], [[SPLAT_SPLAT4]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[BLOCK5]], [[SPLAT_SPLAT7]]
-; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[BLOCK8]], [[SPLAT_SPLAT10]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[BLOCK11]], [[SPLAT_SPLAT13]]
-; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
-; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> [[TMP26]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    store <2 x double> [[COL_LOAD]], <2 x double>* bitcast (double* getelementptr inbounds ([5 x <4 x double>], [5 x <4 x double>]* @foo, i64 0, i64 2, i32 0) to <2 x double>*), align 8
 ; CHECK-NEXT:    store <2 x double> [[COL_LOAD1]], <2 x double>* bitcast (double* getelementptr ([5 x <4 x double>], [5 x <4 x double>]* @foo, i64 0, i64 2, i64 2) to <2 x double>*), align 8

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll
index 9fe38b4d336d..f792177bacf5 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll
@@ -115,7 +115,7 @@ define <6 x float> @strided_load_2x3_align16_stride2(float* %in) {
 ; CHECK-NEXT:    [[VEC_CAST4:%.*]] = bitcast float* [[VEC_GEP3]] to <2 x float>*
 ; CHECK-NEXT:    [[COL_LOAD5:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST4]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[COL_LOAD5]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[COL_LOAD5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    ret <6 x float> [[TMP3]]
 ;

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
index 8ccf1dde632d..85027c7af325 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
@@ -43,81 +43,81 @@ define void @multiply_sub_add_2x3_3x2(<6 x double>* %a.ptr, <6 x double>* %b.ptr
 ; RM-NEXT:    [[VEC_GEP17:%.*]] = getelementptr double, double* [[TMP8]], i64 4
 ; RM-NEXT:    [[VEC_CAST18:%.*]] = bitcast double* [[VEC_GEP17]] to <2 x double>*
 ; RM-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[VEC_CAST18]], align 8
-; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP9:%.*]] = extractelement <3 x double> [[TMP2]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
-; RM-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP11:%.*]] = extractelement <3 x double> [[TMP2]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP11]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP12:%.*]] = fmul <1 x double> [[SPLAT_SPLAT21]], [[BLOCK19]]
 ; RM-NEXT:    [[TMP13:%.*]] = fadd <1 x double> [[TMP10]], [[TMP12]]
-; RM-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP14:%.*]] = extractelement <3 x double> [[TMP2]], i64 2
 ; RM-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]]
 ; RM-NEXT:    [[TMP16:%.*]] = fadd <1 x double> [[TMP13]], [[TMP15]]
-; RM-NEXT:    [[TMP17:%.*]] = shufflevector <1 x double> [[TMP16]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP17:%.*]] = shufflevector <1 x double> [[TMP16]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP18:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP17]], <2 x i32> <i32 2, i32 1>
-; RM-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK25:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP19:%.*]] = extractelement <3 x double> [[TMP2]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT26:%.*]] = insertelement <1 x double> poison, double [[TMP19]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT27:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT26]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP20:%.*]] = fmul <1 x double> [[SPLAT_SPLAT27]], [[BLOCK25]]
-; RM-NEXT:    [[BLOCK28:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK28:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP21:%.*]] = extractelement <3 x double> [[TMP2]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT29:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT30:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT29]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT30]], [[BLOCK28]]
 ; RM-NEXT:    [[TMP23:%.*]] = fadd <1 x double> [[TMP20]], [[TMP22]]
-; RM-NEXT:    [[BLOCK31:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK31:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP24:%.*]] = extractelement <3 x double> [[TMP2]], i64 2
 ; RM-NEXT:    [[SPLAT_SPLATINSERT32:%.*]] = insertelement <1 x double> poison, double [[TMP24]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT33:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT32]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP25:%.*]] = fmul <1 x double> [[SPLAT_SPLAT33]], [[BLOCK31]]
 ; RM-NEXT:    [[TMP26:%.*]] = fadd <1 x double> [[TMP23]], [[TMP25]]
-; RM-NEXT:    [[TMP27:%.*]] = shufflevector <1 x double> [[TMP26]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP27:%.*]] = shufflevector <1 x double> [[TMP26]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP28:%.*]] = shufflevector <2 x double> [[TMP18]], <2 x double> [[TMP27]], <2 x i32> <i32 0, i32 2>
-; RM-NEXT:    [[BLOCK34:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK34:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP29:%.*]] = extractelement <3 x double> [[TMP3]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT35:%.*]] = insertelement <1 x double> poison, double [[TMP29]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT36:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT35]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP30:%.*]] = fmul <1 x double> [[SPLAT_SPLAT36]], [[BLOCK34]]
-; RM-NEXT:    [[BLOCK37:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK37:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP31:%.*]] = extractelement <3 x double> [[TMP3]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT38:%.*]] = insertelement <1 x double> poison, double [[TMP31]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT39:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT38]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP32:%.*]] = fmul <1 x double> [[SPLAT_SPLAT39]], [[BLOCK37]]
 ; RM-NEXT:    [[TMP33:%.*]] = fadd <1 x double> [[TMP30]], [[TMP32]]
-; RM-NEXT:    [[BLOCK40:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK40:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP34:%.*]] = extractelement <3 x double> [[TMP3]], i64 2
 ; RM-NEXT:    [[SPLAT_SPLATINSERT41:%.*]] = insertelement <1 x double> poison, double [[TMP34]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT42:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT41]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP35:%.*]] = fmul <1 x double> [[SPLAT_SPLAT42]], [[BLOCK40]]
 ; RM-NEXT:    [[TMP36:%.*]] = fadd <1 x double> [[TMP33]], [[TMP35]]
-; RM-NEXT:    [[TMP37:%.*]] = shufflevector <1 x double> [[TMP36]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP37:%.*]] = shufflevector <1 x double> [[TMP36]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP38:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP37]], <2 x i32> <i32 2, i32 1>
-; RM-NEXT:    [[BLOCK43:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK43:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP39:%.*]] = extractelement <3 x double> [[TMP3]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT44:%.*]] = insertelement <1 x double> poison, double [[TMP39]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT45:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT44]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP40:%.*]] = fmul <1 x double> [[SPLAT_SPLAT45]], [[BLOCK43]]
-; RM-NEXT:    [[BLOCK46:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK46:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP41:%.*]] = extractelement <3 x double> [[TMP3]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT47:%.*]] = insertelement <1 x double> poison, double [[TMP41]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT48:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT47]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP42:%.*]] = fmul <1 x double> [[SPLAT_SPLAT48]], [[BLOCK46]]
 ; RM-NEXT:    [[TMP43:%.*]] = fadd <1 x double> [[TMP40]], [[TMP42]]
-; RM-NEXT:    [[BLOCK49:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK49:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP44:%.*]] = extractelement <3 x double> [[TMP3]], i64 2
 ; RM-NEXT:    [[SPLAT_SPLATINSERT50:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT51:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT50]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP45:%.*]] = fmul <1 x double> [[SPLAT_SPLAT51]], [[BLOCK49]]
 ; RM-NEXT:    [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
-; RM-NEXT:    [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP48:%.*]] = shufflevector <2 x double> [[TMP38]], <2 x double> [[TMP47]], <2 x i32> <i32 0, i32 2>
 ; RM-NEXT:    [[TMP49:%.*]] = bitcast <4 x double>* [[C_PTR:%.*]] to double*
 ; RM-NEXT:    [[VEC_CAST52:%.*]] = bitcast double* [[TMP49]] to <2 x double>*

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll
index ece51aa49682..3c13902f07b6 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll
@@ -6,57 +6,57 @@
 define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @multiply_2x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]])
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]])
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
-; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]])
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
-; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]])
-; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> [[TMP22]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP23]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x double> [[TMP24]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll
index 90be5a8b92cf..df85e37d72e4 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll
@@ -6,57 +6,57 @@
 define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @multiply_2x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]])
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]])
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
-; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]])
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP18]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
-; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = call <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]])
-; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> [[TMP22]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP23]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x double> [[TMP24]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll
index d70b28394992..918db9f94f1c 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll
@@ -7,61 +7,61 @@
 define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; RM-LABEL: @multiply_2x2(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> undef, <2 x i32> <i32 0, i32 1>
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> undef, <2 x i32> <i32 0, i32 1>
-; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
-; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
+; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
+; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
-; RM-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT6]], [[BLOCK4]]
 ; RM-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
-; RM-NEXT:    [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
-; RM-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[SPLAT_SPLAT9]], [[BLOCK7]]
-; RM-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT12]], [[BLOCK10]]
 ; RM-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
-; RM-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
-; RM-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT15]], [[BLOCK13]]
-; RM-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT18]], [[BLOCK16]]
 ; RM-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
-; RM-NEXT:    [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
-; RM-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT21]], [[BLOCK19]]
-; RM-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT3]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]]
 ; RM-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
-; RM-NEXT:    [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP27:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> [[TMP26]], <2 x i32> <i32 0, i32 2>
 ; RM-NEXT:    [[TMP28:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP27]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; RM-NEXT:    ret <4 x double> [[TMP28]]
@@ -76,36 +76,36 @@ declare <4 x double> @llvm.matrix.multiply.v4f64.v4f64.v4f64(<4 x double>, <4 x
 define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) {
 ; RM-LABEL: @multiply_1x2(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> undef, <1 x i32> zeroinitializer
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> undef, <1 x i32> <i32 1>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
-; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <1 x i32> <i32 1>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
+; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <1 x double> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
-; RM-NEXT:    [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP2]], <2 x i32> <i32 2, i32 1>
-; RM-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP4:%.*]] = extractelement <1 x double> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP5:%.*]] = fmul <1 x double> [[SPLAT_SPLAT5]], [[BLOCK3]]
-; RM-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 2>
-; RM-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP8:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP9:%.*]] = fmul <1 x double> [[SPLAT_SPLAT8]], [[BLOCK6]]
-; RM-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP11:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 1>
-; RM-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP12:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP13:%.*]] = fmul <1 x double> [[SPLAT_SPLAT11]], [[BLOCK9]]
-; RM-NEXT:    [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP15:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP14]], <2 x i32> <i32 0, i32 2>
 ; RM-NEXT:    [[TMP16:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; RM-NEXT:    ret <4 x double> [[TMP16]]
@@ -120,130 +120,130 @@ declare <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double>, <2 x
 define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; RM-LABEL: @multiply_2x3(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x double> [[A:%.*]], <6 x double> undef, <2 x i32> <i32 0, i32 1>
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> undef, <2 x i32> <i32 2, i32 3>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x double> [[A]], <6 x double> undef, <2 x i32> <i32 4, i32 5>
-; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <6 x double> [[B:%.*]], <6 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x double> [[B]], <6 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x double> [[A:%.*]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> <i32 2, i32 3>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> <i32 4, i32 5>
+; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <6 x double> [[B:%.*]], <6 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x double> [[B]], <6 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]]
-; RM-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT7]], [[BLOCK5]]
 ; RM-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
-; RM-NEXT:    [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP6:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
-; RM-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[SPLAT_SPLAT10]], [[BLOCK8]]
-; RM-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT13]], [[BLOCK11]]
 ; RM-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
-; RM-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP13:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
-; RM-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[SPLAT_SPLAT16]], [[BLOCK14]]
-; RM-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT19]], [[BLOCK17]]
 ; RM-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
-; RM-NEXT:    [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP20:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
-; RM-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[SPLAT_SPLAT22]], [[BLOCK20]]
-; RM-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT25]], [[BLOCK23]]
 ; RM-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
-; RM-NEXT:    [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
-; RM-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP29:%.*]] = fmul <1 x double> [[SPLAT_SPLAT28]], [[BLOCK26]]
-; RM-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP30:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP31:%.*]] = fmul <1 x double> [[SPLAT_SPLAT31]], [[BLOCK29]]
 ; RM-NEXT:    [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
-; RM-NEXT:    [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP34:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
-; RM-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP35:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP36:%.*]] = fmul <1 x double> [[SPLAT_SPLAT34]], [[BLOCK32]]
-; RM-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP37:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP38:%.*]] = fmul <1 x double> [[SPLAT_SPLAT37]], [[BLOCK35]]
 ; RM-NEXT:    [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
-; RM-NEXT:    [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP41:%.*]] = shufflevector <3 x double> [[TMP34]], <3 x double> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
-; RM-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[SPLAT_SPLAT40]], [[BLOCK38]]
-; RM-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP44:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP45:%.*]] = fmul <1 x double> [[SPLAT_SPLAT43]], [[BLOCK41]]
 ; RM-NEXT:    [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
-; RM-NEXT:    [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP48:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
-; RM-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP49:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP49]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP50:%.*]] = fmul <1 x double> [[SPLAT_SPLAT46]], [[BLOCK44]]
-; RM-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP51:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP51]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP52:%.*]] = fmul <1 x double> [[SPLAT_SPLAT49]], [[BLOCK47]]
 ; RM-NEXT:    [[TMP53:%.*]] = fadd <1 x double> [[TMP50]], [[TMP52]]
-; RM-NEXT:    [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP55:%.*]] = shufflevector <3 x double> [[TMP48]], <3 x double> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
-; RM-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP56:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP57:%.*]] = fmul <1 x double> [[SPLAT_SPLAT52]], [[BLOCK50]]
-; RM-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP58:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP59:%.*]] = fmul <1 x double> [[SPLAT_SPLAT55]], [[BLOCK53]]
 ; RM-NEXT:    [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]]
-; RM-NEXT:    [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP62:%.*]] = shufflevector <3 x double> [[TMP55]], <3 x double> [[TMP61]], <3 x i32> <i32 0, i32 1, i32 3>
 ; RM-NEXT:    [[TMP63:%.*]] = shufflevector <3 x double> [[TMP20]], <3 x double> [[TMP41]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; RM-NEXT:    [[TMP64:%.*]] = shufflevector <3 x double> [[TMP62]], <3 x double> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP64:%.*]] = shufflevector <3 x double> [[TMP62]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP65:%.*]] = shufflevector <6 x double> [[TMP63]], <6 x double> [[TMP64]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
 ; RM-NEXT:    ret <9 x double> [[TMP65]]
 ;

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll
index 496234772b62..ec4dfaf6e7e3 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll
@@ -6,61 +6,61 @@
 define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @multiply_2x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[BLOCK4]], [[SPLAT_SPLAT6]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[BLOCK7]], [[SPLAT_SPLAT9]]
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[BLOCK10]], [[SPLAT_SPLAT12]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[BLOCK13]], [[SPLAT_SPLAT15]]
-; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[BLOCK16]], [[SPLAT_SPLAT18]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]]
-; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x double> [[SPLIT1]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> [[TMP26]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP27]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x double> [[TMP28]]
@@ -75,36 +75,36 @@ declare <4 x double> @llvm.matrix.multiply.v4f64.v4f64.v4f64(<4 x double>, <4 x
 define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @multiply_1x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> undef, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> undef, <1 x i32> <i32 1>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP2]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <1 x double> [[BLOCK3]], [[SPLAT_SPLAT5]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <1 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <1 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP14]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x double> [[TMP16]]
@@ -119,130 +119,130 @@ declare <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double>, <2 x
 define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) {
 ; CHECK-LABEL: @multiply_2x3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x double> [[A:%.*]], <6 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x double> [[B:%.*]], <6 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <6 x double> [[B]], <6 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x double> [[B]], <6 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x double> [[A:%.*]], <6 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x double> [[B:%.*]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <6 x double> [[B]], <6 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x double> [[B]], <6 x double> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x double> [[BLOCK5]], [[SPLAT_SPLAT7]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x double> poison, double [[TMP7]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x double> [[BLOCK8]], [[SPLAT_SPLAT10]]
-; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x double> poison, double [[TMP9]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x double> [[BLOCK11]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]]
-; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x double> poison, double [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x double> poison, double [[TMP21]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]]
-; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x double> poison, double [[TMP23]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x double> poison, double [[TMP28]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]]
-; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x double> poison, double [[TMP30]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x double> poison, double [[TMP35]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT33]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP36:%.*]] = fmul <1 x double> [[BLOCK32]], [[SPLAT_SPLAT34]]
-; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
-; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <3 x double> [[TMP34]], <3 x double> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT39]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[BLOCK38]], [[SPLAT_SPLAT40]]
-; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
-; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x double> poison, double [[TMP49]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]]
-; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x double> poison, double [[TMP51]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = fadd <1 x double> [[TMP50]], [[TMP52]]
-; CHECK-NEXT:    [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <3 x double> [[TMP48]], <3 x double> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x double> poison, double [[TMP56]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT51]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP57:%.*]] = fmul <1 x double> [[BLOCK50]], [[SPLAT_SPLAT52]]
-; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x double> [[SPLIT1]], <3 x double> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x double> poison, double [[TMP58]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]]
-; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP62:%.*]] = shufflevector <3 x double> [[TMP55]], <3 x double> [[TMP61]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP63:%.*]] = shufflevector <3 x double> [[TMP20]], <3 x double> [[TMP41]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <3 x double> [[TMP62]], <3 x double> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <3 x double> [[TMP62]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP65:%.*]] = shufflevector <6 x double> [[TMP63]], <6 x double> [[TMP64]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    ret <9 x double> [[TMP65]]
 ;

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll
index a4b37450417a..02de4ef45921 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll
@@ -6,57 +6,57 @@
 define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @multiply_2x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]])
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]]
-; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]])
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]]
-; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]])
-; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <2 x float> [[TMP17]], <2 x float> [[TMP22]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP23]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x float> [[TMP24]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll
index 2663474c2a1e..ba2e665d1c1c 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll
@@ -6,57 +6,57 @@
 define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @multiply_2x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP6]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]])
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]]
-; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]])
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP18]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]]
-; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = call <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]])
-; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <2 x float> [[TMP17]], <2 x float> [[TMP22]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP23]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x float> [[TMP24]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll
index 9d53dc670c04..203dd35128b4 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll
@@ -6,61 +6,61 @@
 define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @multiply_2x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x float> [[BLOCK4]], [[SPLAT_SPLAT6]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x float> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP5]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT8]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x float> [[BLOCK7]], [[SPLAT_SPLAT9]]
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x float> [[BLOCK10]], [[SPLAT_SPLAT12]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x float> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP12]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT14]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x float> [[BLOCK13]], [[SPLAT_SPLAT15]]
-; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x float> [[BLOCK16]], [[SPLAT_SPLAT18]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x float> [[TMP15]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x float> [[TMP18]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x float> [[TMP18]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP19]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT20]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x float> [[BLOCK19]], [[SPLAT_SPLAT21]]
-; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x float> [[SPLIT1]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP23]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x float> [[BLOCK22]], [[SPLAT_SPLAT24]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x float> [[TMP22]], [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x float> [[TMP25]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x float> [[TMP25]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <2 x float> [[TMP20]], <2 x float> [[TMP26]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <2 x float> [[TMP13]], <2 x float> [[TMP27]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x float> [[TMP28]]
@@ -75,36 +75,36 @@ declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x fl
 define <4 x float> @multiply_1x2(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: @multiply_1x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <2 x float> [[A:%.*]], <2 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <2 x float> [[B:%.*]], <2 x float> undef, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x float> [[B]], <2 x float> undef, <1 x i32> <i32 1>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <2 x float> [[A:%.*]], <2 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <2 x float> [[B:%.*]], <2 x float> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x float> [[B]], <2 x float> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x float> [[SPLIT1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP2]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <1 x float> [[SPLIT1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT4]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul <1 x float> [[BLOCK3]], [[SPLAT_SPLAT5]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x float> [[TMP5]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x float> [[TMP5]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <1 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT7]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <1 x float> [[BLOCK6]], [[SPLAT_SPLAT8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP10]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <1 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT10]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = fmul <1 x float> [[BLOCK9]], [[SPLAT_SPLAT11]]
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x float> [[TMP13]], <1 x float> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x float> [[TMP13]], <1 x float> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP14]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x float> [[TMP16]]
@@ -119,130 +119,130 @@ declare <4 x float> @llvm.matrix.multiply.v4f32.v2f32.v2f32(<2 x float>, <2 x fl
 define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) {
 ; CHECK-LABEL: @multiply_2x3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x float> [[A:%.*]], <6 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x float> [[A]], <6 x float> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x float> [[B:%.*]], <6 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <6 x float> [[B]], <6 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x float> [[B]], <6 x float> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x float> [[A:%.*]], <6 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x float> [[A]], <6 x float> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x float> [[B:%.*]], <6 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <6 x float> [[B]], <6 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x float> [[B]], <6 x float> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT6]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <1 x float> [[BLOCK5]], [[SPLAT_SPLAT7]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <1 x float> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x float> poison, float [[TMP7]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT9]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <1 x float> [[BLOCK8]], [[SPLAT_SPLAT10]]
-; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT12]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <1 x float> [[BLOCK11]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <1 x float> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <3 x float> [[TMP6]], <3 x float> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT15]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <1 x float> [[BLOCK14]], [[SPLAT_SPLAT16]]
-; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x float> poison, float [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT18]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <1 x float> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd <1 x float> [[TMP15]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x float> [[TMP18]], <1 x float> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x float> [[TMP18]], <1 x float> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <3 x float> [[TMP13]], <3 x float> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT21]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = fmul <1 x float> [[BLOCK20]], [[SPLAT_SPLAT22]]
-; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x float> poison, float [[TMP23]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT24]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = fmul <1 x float> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd <1 x float> [[TMP22]], [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x float> [[TMP25]], <1 x float> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x float> [[TMP25]], <1 x float> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x float> poison, float [[TMP28]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT27]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = fmul <1 x float> [[BLOCK26]], [[SPLAT_SPLAT28]]
-; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x float> poison, float [[TMP30]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT30]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul <1 x float> [[BLOCK29]], [[SPLAT_SPLAT31]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = fadd <1 x float> [[TMP29]], [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = shufflevector <1 x float> [[TMP32]], <1 x float> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP33:%.*]] = shufflevector <1 x float> [[TMP32]], <1 x float> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <3 x float> [[TMP27]], <3 x float> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x float> poison, float [[TMP35]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT33]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP36:%.*]] = fmul <1 x float> [[BLOCK32]], [[SPLAT_SPLAT34]]
-; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x float> poison, float [[TMP37]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT36]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = fmul <1 x float> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = fadd <1 x float> [[TMP36]], [[TMP38]]
-; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <1 x float> [[TMP39]], <1 x float> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <1 x float> [[TMP39]], <1 x float> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <3 x float> [[TMP34]], <3 x float> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x float> poison, float [[TMP42]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT39]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x float> [[BLOCK38]], [[SPLAT_SPLAT40]]
-; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x float> poison, float [[TMP44]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT42]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = fmul <1 x float> [[BLOCK41]], [[SPLAT_SPLAT43]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd <1 x float> [[TMP43]], [[TMP45]]
-; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <1 x float> [[TMP46]], <1 x float> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <1 x float> [[TMP46]], <1 x float> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x float> poison, float [[TMP49]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT45]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = fmul <1 x float> [[BLOCK44]], [[SPLAT_SPLAT46]]
-; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x float> poison, float [[TMP51]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT48]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = fmul <1 x float> [[BLOCK47]], [[SPLAT_SPLAT49]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = fadd <1 x float> [[TMP50]], [[TMP52]]
-; CHECK-NEXT:    [[TMP54:%.*]] = shufflevector <1 x float> [[TMP53]], <1 x float> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP54:%.*]] = shufflevector <1 x float> [[TMP53]], <1 x float> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <3 x float> [[TMP48]], <3 x float> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x float> poison, float [[TMP56]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT51]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP57:%.*]] = fmul <1 x float> [[BLOCK50]], [[SPLAT_SPLAT52]]
-; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x float> [[SPLIT1]], <3 x float> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x float> poison, float [[TMP58]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT54]], <1 x float> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP59:%.*]] = fmul <1 x float> [[BLOCK53]], [[SPLAT_SPLAT55]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = fadd <1 x float> [[TMP57]], [[TMP59]]
-; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <1 x float> [[TMP60]], <1 x float> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <1 x float> [[TMP60]], <1 x float> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP62:%.*]] = shufflevector <3 x float> [[TMP55]], <3 x float> [[TMP61]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP63:%.*]] = shufflevector <3 x float> [[TMP20]], <3 x float> [[TMP41]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <3 x float> [[TMP62]], <3 x float> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <3 x float> [[TMP62]], <3 x float> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP65:%.*]] = shufflevector <6 x float> [[TMP63]], <6 x float> [[TMP64]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    ret <9 x float> [[TMP65]]
 ;

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll
index e2d93ca76f58..70763431b1f3 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll
@@ -48,31 +48,31 @@ define void @multiply_all_volatile(<4 x double>* noalias %A, <4 x double>* noali
 ; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP9]], i64 2
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]])
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]])
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]])
-; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]])
-; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    br label [[INNER_LATCH]]
 ; CHECK:       inner.latch:
@@ -156,31 +156,31 @@ define void @multiply_load0_volatile(<4 x double>* noalias %A, <4 x double>* noa
 ; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP9]], i64 2
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]])
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]])
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]])
-; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]])
-; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    br label [[INNER_LATCH]]
 ; CHECK:       inner.latch:
@@ -263,31 +263,31 @@ define void @multiply_load1_volatile(<4 x double>* noalias %A, <4 x double>* noa
 ; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP9]], i64 2
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]])
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]])
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]])
-; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]])
-; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    br label [[INNER_LATCH]]
 ; CHECK:       inner.latch:
@@ -370,31 +370,31 @@ define void @multiply_store_volatile(<4 x double>* noalias %A, <4 x double>* noa
 ; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP9]], i64 2
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK9]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]])
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD5]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP11]])
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP15]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP14]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <2 x double> poison, double [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT15]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK14]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[BLOCK13]])
-; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <2 x double> [[COL_LOAD8]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <2 x double> poison, double [[TMP18]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK17]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP17]])
-; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP21]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP20]], <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    br label [[INNER_LATCH]]
 ; CHECK:       inner.latch:

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll
index f84499090537..d5a2f844e8dd 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll
@@ -7,61 +7,61 @@
 define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) {
 ; RM-LABEL: @multiply_2x2(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
-; RM-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], [[BLOCK4]]
 ; RM-NEXT:    [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
-; RM-NEXT:    [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP5]], <2 x i32> <i32 2, i32 1>
-; RM-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT8]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP8:%.*]] = mul <1 x i32> [[SPLAT_SPLAT9]], [[BLOCK7]]
-; RM-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP10:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], [[BLOCK10]]
 ; RM-NEXT:    [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
-; RM-NEXT:    [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP12]], <2 x i32> <i32 0, i32 2>
-; RM-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT14]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT15]], [[BLOCK13]]
-; RM-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT18]], [[BLOCK16]]
 ; RM-NEXT:    [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
-; RM-NEXT:    [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP20:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP19]], <2 x i32> <i32 2, i32 1>
-; RM-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT20]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP22:%.*]] = mul <1 x i32> [[SPLAT_SPLAT21]], [[BLOCK19]]
-; RM-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x i32> [[SPLIT3]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP24:%.*]] = mul <1 x i32> [[SPLAT_SPLAT24]], [[BLOCK22]]
 ; RM-NEXT:    [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]]
-; RM-NEXT:    [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP27:%.*]] = shufflevector <2 x i32> [[TMP20]], <2 x i32> [[TMP26]], <2 x i32> <i32 0, i32 2>
 ; RM-NEXT:    [[TMP28:%.*]] = shufflevector <2 x i32> [[TMP13]], <2 x i32> [[TMP27]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; RM-NEXT:    ret <4 x i32> [[TMP28]]
@@ -76,36 +76,36 @@ declare <4 x i32> @llvm.matrix.multiply.v4f64.v4f64.v4f64(<4 x i32>, <4 x i32>,
 define <4 x i32> @multiply_1x2(<2 x i32> %a, <2 x i32> %b) {
 ; RM-LABEL: @multiply_1x2(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <2 x i32> [[A:%.*]], <2 x i32> undef, <1 x i32> zeroinitializer
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <1 x i32> <i32 1>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x i32> [[B:%.*]], <2 x i32> undef, <2 x i32> <i32 0, i32 1>
-; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <2 x i32> [[A:%.*]], <2 x i32> poison, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <1 x i32> <i32 1>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x i32> [[B:%.*]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
+; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <1 x i32> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
-; RM-NEXT:    [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP2]], <2 x i32> <i32 2, i32 1>
-; RM-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP4:%.*]] = extractelement <1 x i32> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT4]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP5:%.*]] = mul <1 x i32> [[SPLAT_SPLAT5]], [[BLOCK3]]
-; RM-NEXT:    [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP6]], <2 x i32> <i32 0, i32 2>
-; RM-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP8:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], [[BLOCK6]]
-; RM-NEXT:    [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP11:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP10]], <2 x i32> <i32 2, i32 1>
-; RM-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP12:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP13:%.*]] = mul <1 x i32> [[SPLAT_SPLAT11]], [[BLOCK9]]
-; RM-NEXT:    [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; RM-NEXT:    [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; RM-NEXT:    [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP14]], <2 x i32> <i32 0, i32 2>
 ; RM-NEXT:    [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; RM-NEXT:    ret <4 x i32> [[TMP16]]
@@ -120,130 +120,130 @@ declare <4 x i32> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x i32>, <2 x i32>,
 define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; RM-LABEL: @multiply_2x3(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[A:%.*]], <6 x i32> undef, <2 x i32> <i32 0, i32 1>
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[A]], <6 x i32> undef, <2 x i32> <i32 2, i32 3>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x i32> [[A]], <6 x i32> undef, <2 x i32> <i32 4, i32 5>
-; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <6 x i32> [[B:%.*]], <6 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x i32> [[B]], <6 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[A:%.*]], <6 x i32> poison, <2 x i32> <i32 0, i32 1>
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[A]], <6 x i32> poison, <2 x i32> <i32 2, i32 3>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x i32> [[A]], <6 x i32> poison, <2 x i32> <i32 4, i32 5>
+; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <6 x i32> [[B:%.*]], <6 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x i32> [[B]], <6 x i32> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; RM-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]]
-; RM-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT6]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT7]], [[BLOCK5]]
 ; RM-NEXT:    [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
-; RM-NEXT:    [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP6:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
-; RM-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP8:%.*]] = mul <1 x i32> [[SPLAT_SPLAT10]], [[BLOCK8]]
-; RM-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP10:%.*]] = mul <1 x i32> [[SPLAT_SPLAT13]], [[BLOCK11]]
 ; RM-NEXT:    [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
-; RM-NEXT:    [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP13:%.*]] = shufflevector <3 x i32> [[TMP6]], <3 x i32> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
-; RM-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT15]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT16]], [[BLOCK14]]
-; RM-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT19]], [[BLOCK17]]
 ; RM-NEXT:    [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
-; RM-NEXT:    [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP20:%.*]] = shufflevector <3 x i32> [[TMP13]], <3 x i32> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
-; RM-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP22:%.*]] = mul <1 x i32> [[SPLAT_SPLAT22]], [[BLOCK20]]
-; RM-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP24:%.*]] = mul <1 x i32> [[SPLAT_SPLAT25]], [[BLOCK23]]
 ; RM-NEXT:    [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]]
-; RM-NEXT:    [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP27:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
-; RM-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP28:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP28]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP29:%.*]] = mul <1 x i32> [[SPLAT_SPLAT28]], [[BLOCK26]]
-; RM-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP30:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP30]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP31:%.*]] = mul <1 x i32> [[SPLAT_SPLAT31]], [[BLOCK29]]
 ; RM-NEXT:    [[TMP32:%.*]] = add <1 x i32> [[TMP29]], [[TMP31]]
-; RM-NEXT:    [[TMP33:%.*]] = shufflevector <1 x i32> [[TMP32]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP33:%.*]] = shufflevector <1 x i32> [[TMP32]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP34:%.*]] = shufflevector <3 x i32> [[TMP27]], <3 x i32> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
-; RM-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP35:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP35]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP36:%.*]] = mul <1 x i32> [[SPLAT_SPLAT34]], [[BLOCK32]]
-; RM-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP37:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP37]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP38:%.*]] = mul <1 x i32> [[SPLAT_SPLAT37]], [[BLOCK35]]
 ; RM-NEXT:    [[TMP39:%.*]] = add <1 x i32> [[TMP36]], [[TMP38]]
-; RM-NEXT:    [[TMP40:%.*]] = shufflevector <1 x i32> [[TMP39]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP40:%.*]] = shufflevector <1 x i32> [[TMP39]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP41:%.*]] = shufflevector <3 x i32> [[TMP34]], <3 x i32> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
-; RM-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP42:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> poison, i32 [[TMP42]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT39]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP43:%.*]] = mul <1 x i32> [[SPLAT_SPLAT40]], [[BLOCK38]]
-; RM-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> zeroinitializer
+; RM-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP44:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> poison, i32 [[TMP44]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP45:%.*]] = mul <1 x i32> [[SPLAT_SPLAT43]], [[BLOCK41]]
 ; RM-NEXT:    [[TMP46:%.*]] = add <1 x i32> [[TMP43]], [[TMP45]]
-; RM-NEXT:    [[TMP47:%.*]] = shufflevector <1 x i32> [[TMP46]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP47:%.*]] = shufflevector <1 x i32> [[TMP46]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP48:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
-; RM-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP49:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x i32> poison, i32 [[TMP49]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT45]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP50:%.*]] = mul <1 x i32> [[SPLAT_SPLAT46]], [[BLOCK44]]
-; RM-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> <i32 1>
+; RM-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 1>
 ; RM-NEXT:    [[TMP51:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x i32> poison, i32 [[TMP51]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT48]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP52:%.*]] = mul <1 x i32> [[SPLAT_SPLAT49]], [[BLOCK47]]
 ; RM-NEXT:    [[TMP53:%.*]] = add <1 x i32> [[TMP50]], [[TMP52]]
-; RM-NEXT:    [[TMP54:%.*]] = shufflevector <1 x i32> [[TMP53]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP54:%.*]] = shufflevector <1 x i32> [[TMP53]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP55:%.*]] = shufflevector <3 x i32> [[TMP48]], <3 x i32> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
-; RM-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP56:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
 ; RM-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x i32> poison, i32 [[TMP56]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT51]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP57:%.*]] = mul <1 x i32> [[SPLAT_SPLAT52]], [[BLOCK50]]
-; RM-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> undef, <1 x i32> <i32 2>
+; RM-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x i32> [[SPLIT4]], <3 x i32> poison, <1 x i32> <i32 2>
 ; RM-NEXT:    [[TMP58:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
 ; RM-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x i32> poison, i32 [[TMP58]], i32 0
 ; RM-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT54]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; RM-NEXT:    [[TMP59:%.*]] = mul <1 x i32> [[SPLAT_SPLAT55]], [[BLOCK53]]
 ; RM-NEXT:    [[TMP60:%.*]] = add <1 x i32> [[TMP57]], [[TMP59]]
-; RM-NEXT:    [[TMP61:%.*]] = shufflevector <1 x i32> [[TMP60]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP61:%.*]] = shufflevector <1 x i32> [[TMP60]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP62:%.*]] = shufflevector <3 x i32> [[TMP55]], <3 x i32> [[TMP61]], <3 x i32> <i32 0, i32 1, i32 3>
 ; RM-NEXT:    [[TMP63:%.*]] = shufflevector <3 x i32> [[TMP20]], <3 x i32> [[TMP41]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; RM-NEXT:    [[TMP64:%.*]] = shufflevector <3 x i32> [[TMP62]], <3 x i32> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; RM-NEXT:    [[TMP64:%.*]] = shufflevector <3 x i32> [[TMP62]], <3 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
 ; RM-NEXT:    [[TMP65:%.*]] = shufflevector <6 x i32> [[TMP63]], <6 x i32> [[TMP64]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
 ; RM-NEXT:    ret <9 x i32> [[TMP65]]
 ;

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll
index cc47b0d23ed9..5dfd9b84b01b 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll
@@ -6,61 +6,61 @@
 define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @multiply_2x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK4:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul <1 x i32> [[BLOCK4]], [[SPLAT_SPLAT6]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP5]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK7:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT8]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <1 x i32> [[BLOCK7]], [[SPLAT_SPLAT9]]
-; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK10:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul <1 x i32> [[BLOCK10]], [[SPLAT_SPLAT12]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP12]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK13:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT14]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul <1 x i32> [[BLOCK13]], [[SPLAT_SPLAT15]]
-; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK16:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = mul <1 x i32> [[BLOCK16]], [[SPLAT_SPLAT18]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP19]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK19:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT20]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = mul <1 x i32> [[BLOCK19]], [[SPLAT_SPLAT21]]
-; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK22:%.*]] = shufflevector <2 x i32> [[SPLIT1]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = mul <1 x i32> [[BLOCK22]], [[SPLAT_SPLAT24]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <2 x i32> [[TMP20]], <2 x i32> [[TMP26]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <2 x i32> [[TMP13]], <2 x i32> [[TMP27]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP28]]
@@ -75,36 +75,36 @@ declare <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32>, <4 x i32>,
 define <4 x i32> @multiply_1x2(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @multiply_1x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <2 x i32> [[A:%.*]], <2 x i32> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <2 x i32> [[B:%.*]], <2 x i32> undef, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> undef, <1 x i32> <i32 1>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <2 x i32> [[A:%.*]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <2 x i32> [[B:%.*]], <2 x i32> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP2]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK3:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT4]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <1 x i32> [[BLOCK3]], [[SPLAT_SPLAT5]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP6]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK6:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <1 x i32> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul <1 x i32> [[BLOCK6]], [[SPLAT_SPLAT8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP10]], <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <1 x i32> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul <1 x i32> [[BLOCK9]], [[SPLAT_SPLAT11]]
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> poison, <2 x i32> <i32 0, i32 undef>
 ; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP14]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP16]]
@@ -119,130 +119,130 @@ declare <4 x i32> @llvm.matrix.multiply.v4i32.v2i32.v2i32(<2 x i32>, <2 x i32>,
 define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) {
 ; CHECK-LABEL: @multiply_2x3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[A:%.*]], <6 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[A]], <6 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x i32> [[B:%.*]], <6 x i32> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <6 x i32> [[B]], <6 x i32> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x i32> [[B]], <6 x i32> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[A:%.*]], <6 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[A]], <6 x i32> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x i32> [[B:%.*]], <6 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <6 x i32> [[B]], <6 x i32> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <6 x i32> [[B]], <6 x i32> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]]
-; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK5:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT6:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT6]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul <1 x i32> [[BLOCK5]], [[SPLAT_SPLAT7]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP5]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK8:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <1 x i32> [[BLOCK8]], [[SPLAT_SPLAT10]]
-; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK11:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT12:%.*]] = insertelement <1 x i32> poison, i32 [[TMP9]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul <1 x i32> [[BLOCK11]], [[SPLAT_SPLAT13]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <3 x i32> [[TMP6]], <3 x i32> [[TMP12]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK14:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT15:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT15]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul <1 x i32> [[BLOCK14]], [[SPLAT_SPLAT16]]
-; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK17:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT18:%.*]] = insertelement <1 x i32> poison, i32 [[TMP16]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = mul <1 x i32> [[BLOCK17]], [[SPLAT_SPLAT19]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <3 x i32> [[TMP13]], <3 x i32> [[TMP19]], <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK20:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT21:%.*]] = insertelement <1 x i32> poison, i32 [[TMP21]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT21]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = mul <1 x i32> [[BLOCK20]], [[SPLAT_SPLAT22]]
-; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK23:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT24:%.*]] = insertelement <1 x i32> poison, i32 [[TMP23]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = mul <1 x i32> [[BLOCK23]], [[SPLAT_SPLAT25]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP26]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK26:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT27:%.*]] = insertelement <1 x i32> poison, i32 [[TMP28]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT27]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP29:%.*]] = mul <1 x i32> [[BLOCK26]], [[SPLAT_SPLAT28]]
-; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK29:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT30:%.*]] = insertelement <1 x i32> poison, i32 [[TMP30]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP31:%.*]] = mul <1 x i32> [[BLOCK29]], [[SPLAT_SPLAT31]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = add <1 x i32> [[TMP29]], [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = shufflevector <1 x i32> [[TMP32]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP33:%.*]] = shufflevector <1 x i32> [[TMP32]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <3 x i32> [[TMP27]], <3 x i32> [[TMP33]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK32:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT33:%.*]] = insertelement <1 x i32> poison, i32 [[TMP35]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT34:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT33]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP36:%.*]] = mul <1 x i32> [[BLOCK32]], [[SPLAT_SPLAT34]]
-; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK35:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT36:%.*]] = insertelement <1 x i32> poison, i32 [[TMP37]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP38:%.*]] = mul <1 x i32> [[BLOCK35]], [[SPLAT_SPLAT37]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = add <1 x i32> [[TMP36]], [[TMP38]]
-; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <1 x i32> [[TMP39]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <1 x i32> [[TMP39]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <3 x i32> [[TMP34]], <3 x i32> [[TMP40]], <3 x i32> <i32 0, i32 1, i32 3>
-; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK38:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT39:%.*]] = insertelement <1 x i32> poison, i32 [[TMP42]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT40:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT39]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP43:%.*]] = mul <1 x i32> [[BLOCK38]], [[SPLAT_SPLAT40]]
-; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BLOCK41:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT42:%.*]] = insertelement <1 x i32> poison, i32 [[TMP44]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP45:%.*]] = mul <1 x i32> [[BLOCK41]], [[SPLAT_SPLAT43]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = add <1 x i32> [[TMP43]], [[TMP45]]
-; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <1 x i32> [[TMP46]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <1 x i32> [[TMP46]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP48:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP47]], <3 x i32> <i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK44:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT45:%.*]] = insertelement <1 x i32> poison, i32 [[TMP49]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT45]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP50:%.*]] = mul <1 x i32> [[BLOCK44]], [[SPLAT_SPLAT46]]
-; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[BLOCK47:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT48:%.*]] = insertelement <1 x i32> poison, i32 [[TMP51]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT48]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP52:%.*]] = mul <1 x i32> [[BLOCK47]], [[SPLAT_SPLAT49]]
 ; CHECK-NEXT:    [[TMP53:%.*]] = add <1 x i32> [[TMP50]], [[TMP52]]
-; CHECK-NEXT:    [[TMP54:%.*]] = shufflevector <1 x i32> [[TMP53]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP54:%.*]] = shufflevector <1 x i32> [[TMP53]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <3 x i32> [[TMP48]], <3 x i32> [[TMP54]], <3 x i32> <i32 0, i32 3, i32 2>
-; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK50:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT51:%.*]] = insertelement <1 x i32> poison, i32 [[TMP56]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT52:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT51]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP57:%.*]] = mul <1 x i32> [[BLOCK50]], [[SPLAT_SPLAT52]]
-; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> undef, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[BLOCK53:%.*]] = shufflevector <3 x i32> [[SPLIT1]], <3 x i32> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT54:%.*]] = insertelement <1 x i32> poison, i32 [[TMP58]], i32 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT54]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP59:%.*]] = mul <1 x i32> [[BLOCK53]], [[SPLAT_SPLAT55]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = add <1 x i32> [[TMP57]], [[TMP59]]
-; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <1 x i32> [[TMP60]], <1 x i32> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <1 x i32> [[TMP60]], <1 x i32> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP62:%.*]] = shufflevector <3 x i32> [[TMP55]], <3 x i32> [[TMP61]], <3 x i32> <i32 0, i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP63:%.*]] = shufflevector <3 x i32> [[TMP20]], <3 x i32> [[TMP41]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <3 x i32> [[TMP62]], <3 x i32> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <3 x i32> [[TMP62]], <3 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP65:%.*]] = shufflevector <6 x i32> [[TMP63]], <6 x i32> [[TMP64]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    ret <9 x i32> [[TMP65]]
 ;

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backward.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backward.ll
index 9b154bb0e2de..584f47d8530b 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backward.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backward.ll
@@ -5,14 +5,14 @@
 define <8 x double> @fadd_transpose(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @fadd_transpose(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT6:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT7:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT6:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT7:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> <i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP0:%.*]] = fadd <2 x double> [[SPLIT]], [[SPLIT4]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x double> [[SPLIT1]], [[SPLIT5]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[SPLIT2]], [[SPLIT6]]
@@ -57,10 +57,10 @@ define <8 x double> @load_fadd_transpose(<8 x double>* %A.Ptr, <8 x double> %b)
 ; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP0]], i64 6
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT9:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT10:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT11:%.*]] = shufflevector <8 x double> [[B]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT9:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT10:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT11:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> <i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x double> [[COL_LOAD]], [[SPLIT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[COL_LOAD2]], [[SPLIT9]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[COL_LOAD5]], [[SPLIT10]]

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll
index 591cddd261cf..f031843f2da4 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll
@@ -7,11 +7,10 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-LABEL: @unsupported_phi(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-
-; CHECK-LABEL: if.then:
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <9 x double> [[A:%.*]], <9 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK:       if.then:
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <9 x double> [[A:%.*]], <9 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <9 x double> [[A]], <9 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <9 x double> [[A]], <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x double> undef, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0
@@ -31,14 +30,13 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> [[TMP11]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <6 x double> [[TMP18]], <6 x double> [[TMP19]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    br label [[IF_END:%.*]]
-
-; CHECK-LABEL: if.else:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <9 x double> [[B:%.*]], <9 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK:       if.else:
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <9 x double> [[B:%.*]], <9 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <9 x double> [[B]], <9 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <9 x double> [[B]], <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <3 x double> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <3 x double> undef, double [[TMP21]], i64 0
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 0
@@ -58,20 +56,196 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B,
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 2
 ; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <3 x double> [[TMP36]], double [[TMP37]], i64 2
 ; CHECK-NEXT:    [[TMP39:%.*]] = shufflevector <3 x double> [[TMP26]], <3 x double> [[TMP32]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <3 x double> [[TMP38]], <3 x double> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <3 x double> [[TMP38]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <6 x double> [[TMP39]], <6 x double> [[TMP40]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    br label [[IF_END]]
-
-; CHECK-LABEL: if.end:
+; CHECK:       if.end:
 ; CHECK-NEXT:    [[MERGE:%.*]] = phi <9 x double> [ [[TMP20]], [[IF_THEN]] ], [ [[TMP41]], [[IF_ELSE]] ]
-; CHECK-NEXT:    [[SPLIT6:%.*]] = shufflevector <9 x double> [[C:%.*]], <9 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT7:%.*]] = shufflevector <9 x double> [[C]], <9 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT8:%.*]] = shufflevector <9 x double> [[C]], <9 x double> undef, <3 x i32> <i32 6, i32 7, i32 8>
-; CHECK-NEXT:    [[SPLIT9:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT10:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT11:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> undef, <3 x i32> <i32 6, i32 7, i32 8>
-; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> undef, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[SPLIT6:%.*]] = shufflevector <9 x double> [[C:%.*]], <9 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT7:%.*]] = shufflevector <9 x double> [[C]], <9 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT8:%.*]] = shufflevector <9 x double> [[C]], <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[SPLIT9:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT10:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT11:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP45:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
+; CHECK-NEXT:    [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP47]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP48:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[TMP49:%.*]] = fadd <1 x double> [[TMP46]], [[TMP48]]
+; CHECK-NEXT:    [[TMP50:%.*]] = shufflevector <1 x double> [[TMP49]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP51:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP50]], <3 x i32> <i32 3, i32 1, i32 2>
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP52:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP53:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP54:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP54]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP55:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
+; CHECK-NEXT:    [[TMP56:%.*]] = fadd <1 x double> [[TMP53]], [[TMP55]]
+; CHECK-NEXT:    [[BLOCK24:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP57:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP57]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP58:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
+; CHECK-NEXT:    [[TMP59:%.*]] = fadd <1 x double> [[TMP56]], [[TMP58]]
+; CHECK-NEXT:    [[TMP60:%.*]] = shufflevector <1 x double> [[TMP59]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <3 x double> [[TMP51]], <3 x double> [[TMP60]], <3 x i32> <i32 0, i32 3, i32 2>
+; CHECK-NEXT:    [[BLOCK27:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP63:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
+; CHECK-NEXT:    [[BLOCK30:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP64]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP65:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
+; CHECK-NEXT:    [[TMP66:%.*]] = fadd <1 x double> [[TMP63]], [[TMP65]]
+; CHECK-NEXT:    [[BLOCK33:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP67:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP67]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP68:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
+; CHECK-NEXT:    [[TMP69:%.*]] = fadd <1 x double> [[TMP66]], [[TMP68]]
+; CHECK-NEXT:    [[TMP70:%.*]] = shufflevector <1 x double> [[TMP69]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP71:%.*]] = shufflevector <3 x double> [[TMP61]], <3 x double> [[TMP70]], <3 x i32> <i32 0, i32 1, i32 3>
+; CHECK-NEXT:    [[BLOCK36:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP72:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP73:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
+; CHECK-NEXT:    [[BLOCK39:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP74]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP75:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]]
+; CHECK-NEXT:    [[TMP76:%.*]] = fadd <1 x double> [[TMP73]], [[TMP75]]
+; CHECK-NEXT:    [[BLOCK42:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP77]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP78:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]]
+; CHECK-NEXT:    [[TMP79:%.*]] = fadd <1 x double> [[TMP76]], [[TMP78]]
+; CHECK-NEXT:    [[TMP80:%.*]] = shufflevector <1 x double> [[TMP79]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP81:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP80]], <3 x i32> <i32 3, i32 1, i32 2>
+; CHECK-NEXT:    [[BLOCK45:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP83:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]]
+; CHECK-NEXT:    [[BLOCK48:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP84:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP84]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP85:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]]
+; CHECK-NEXT:    [[TMP86:%.*]] = fadd <1 x double> [[TMP83]], [[TMP85]]
+; CHECK-NEXT:    [[BLOCK51:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP87:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP87]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP88:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]]
+; CHECK-NEXT:    [[TMP89:%.*]] = fadd <1 x double> [[TMP86]], [[TMP88]]
+; CHECK-NEXT:    [[TMP90:%.*]] = shufflevector <1 x double> [[TMP89]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP91:%.*]] = shufflevector <3 x double> [[TMP81]], <3 x double> [[TMP90]], <3 x i32> <i32 0, i32 3, i32 2>
+; CHECK-NEXT:    [[BLOCK54:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP93:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]]
+; CHECK-NEXT:    [[BLOCK57:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP94:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP94]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP95:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]]
+; CHECK-NEXT:    [[TMP96:%.*]] = fadd <1 x double> [[TMP93]], [[TMP95]]
+; CHECK-NEXT:    [[BLOCK60:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP97:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP97]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP98:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]]
+; CHECK-NEXT:    [[TMP99:%.*]] = fadd <1 x double> [[TMP96]], [[TMP98]]
+; CHECK-NEXT:    [[TMP100:%.*]] = shufflevector <1 x double> [[TMP99]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP101:%.*]] = shufflevector <3 x double> [[TMP91]], <3 x double> [[TMP100]], <3 x i32> <i32 0, i32 1, i32 3>
+; CHECK-NEXT:    [[BLOCK63:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP102:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP103:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]]
+; CHECK-NEXT:    [[BLOCK66:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP104]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP105:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]]
+; CHECK-NEXT:    [[TMP106:%.*]] = fadd <1 x double> [[TMP103]], [[TMP105]]
+; CHECK-NEXT:    [[BLOCK69:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP107]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP108:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]]
+; CHECK-NEXT:    [[TMP109:%.*]] = fadd <1 x double> [[TMP106]], [[TMP108]]
+; CHECK-NEXT:    [[TMP110:%.*]] = shufflevector <1 x double> [[TMP109]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP111:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP110]], <3 x i32> <i32 3, i32 1, i32 2>
+; CHECK-NEXT:    [[BLOCK72:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP112:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP112]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP113:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]]
+; CHECK-NEXT:    [[BLOCK75:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP114:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP114]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP115:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]]
+; CHECK-NEXT:    [[TMP116:%.*]] = fadd <1 x double> [[TMP113]], [[TMP115]]
+; CHECK-NEXT:    [[BLOCK78:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP117:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP117]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP118:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]]
+; CHECK-NEXT:    [[TMP119:%.*]] = fadd <1 x double> [[TMP116]], [[TMP118]]
+; CHECK-NEXT:    [[TMP120:%.*]] = shufflevector <1 x double> [[TMP119]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP121:%.*]] = shufflevector <3 x double> [[TMP111]], <3 x double> [[TMP120]], <3 x i32> <i32 0, i32 3, i32 2>
+; CHECK-NEXT:    [[BLOCK81:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP122:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP122]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP123:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]]
+; CHECK-NEXT:    [[BLOCK84:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP124:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP124]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP125:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]]
+; CHECK-NEXT:    [[TMP126:%.*]] = fadd <1 x double> [[TMP123]], [[TMP125]]
+; CHECK-NEXT:    [[BLOCK87:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP127:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT88:%.*]] = insertelement <1 x double> poison, double [[TMP127]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT89:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT88]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP128:%.*]] = fmul <1 x double> [[BLOCK87]], [[SPLAT_SPLAT89]]
+; CHECK-NEXT:    [[TMP129:%.*]] = fadd <1 x double> [[TMP126]], [[TMP128]]
+; CHECK-NEXT:    [[TMP130:%.*]] = shufflevector <1 x double> [[TMP129]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP131:%.*]] = shufflevector <3 x double> [[TMP121]], <3 x double> [[TMP130]], <3 x i32> <i32 0, i32 1, i32 3>
+; CHECK-NEXT:    [[TMP132:%.*]] = shufflevector <3 x double> [[TMP71]], <3 x double> [[TMP101]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP133:%.*]] = shufflevector <3 x double> [[TMP131]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP134:%.*]] = shufflevector <6 x double> [[TMP132]], <6 x double> [[TMP133]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    ret <9 x double> [[TMP134]]
 ;
+
+
+
 entry:
   br i1 %cond, label %if.then, label %if.else
 
@@ -92,9 +266,9 @@ if.end:                                        ; preds = %if.then, %if.else
 ; Make sure we use a flattened vector when calling @foo and the use its flat vector result properly.
 define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B) {
 ; CHECK-LABEL: @unsupported_call(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <9 x double> [[A:%.*]], <9 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <9 x double> [[A]], <9 x double> undef, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <9 x double> [[A:%.*]], <9 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <9 x double> [[A]], <9 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <9 x double> [[A]], <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <3 x double> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <3 x double> undef, double [[TMP1]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 0
@@ -114,15 +288,190 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 2
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <3 x double> [[TMP16]], double [[TMP17]], i64 2
 ; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <3 x double> [[TMP18]], <3 x double> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <3 x double> [[TMP18]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <6 x double> [[TMP19]], <6 x double> [[TMP20]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    [[A_FOO:%.*]] = call <9 x double> @foo(<9 x double> [[TMP21]])
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <9 x double> [[B:%.*]], <9 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <9 x double> [[B]], <9 x double> undef, <3 x i32> <i32 6, i32 7, i32 8>
-; CHECK-NEXT:    [[SPLIT6:%.*]] = shufflevector <9 x double> [[A_FOO]], <9 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT7:%.*]] = shufflevector <9 x double> [[A_FOO]], <9 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT8:%.*]] = shufflevector <9 x double> [[A_FOO]], <9 x double> undef, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <9 x double> [[B:%.*]], <9 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <9 x double> [[B]], <9 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <9 x double> [[B]], <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[SPLIT6:%.*]] = shufflevector <9 x double> [[A_FOO]], <9 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT7:%.*]] = shufflevector <9 x double> [[A_FOO]], <9 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT8:%.*]] = shufflevector <9 x double> [[A_FOO]], <9 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP22]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP23:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]]
+; CHECK-NEXT:    [[BLOCK9:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP24]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP25:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]]
+; CHECK-NEXT:    [[TMP26:%.*]] = fadd <1 x double> [[TMP23]], [[TMP25]]
+; CHECK-NEXT:    [[BLOCK12:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP27]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP28:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]]
+; CHECK-NEXT:    [[TMP29:%.*]] = fadd <1 x double> [[TMP26]], [[TMP28]]
+; CHECK-NEXT:    [[TMP30:%.*]] = shufflevector <1 x double> [[TMP29]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP30]], <3 x i32> <i32 3, i32 1, i32 2>
+; CHECK-NEXT:    [[BLOCK15:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP32]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP33:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]]
+; CHECK-NEXT:    [[BLOCK18:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP34]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP35:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]]
+; CHECK-NEXT:    [[TMP36:%.*]] = fadd <1 x double> [[TMP33]], [[TMP35]]
+; CHECK-NEXT:    [[BLOCK21:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP37]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP38:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]]
+; CHECK-NEXT:    [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]]
+; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP41:%.*]] = shufflevector <3 x double> [[TMP31]], <3 x double> [[TMP40]], <3 x i32> <i32 0, i32 3, i32 2>
+; CHECK-NEXT:    [[BLOCK24:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP43:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]]
+; CHECK-NEXT:    [[BLOCK27:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP45:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]]
+; CHECK-NEXT:    [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]]
+; CHECK-NEXT:    [[BLOCK30:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP47]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP48:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]]
+; CHECK-NEXT:    [[TMP49:%.*]] = fadd <1 x double> [[TMP46]], [[TMP48]]
+; CHECK-NEXT:    [[TMP50:%.*]] = shufflevector <1 x double> [[TMP49]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP51:%.*]] = shufflevector <3 x double> [[TMP41]], <3 x double> [[TMP50]], <3 x i32> <i32 0, i32 1, i32 3>
+; CHECK-NEXT:    [[BLOCK33:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP52:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP53:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]]
+; CHECK-NEXT:    [[BLOCK36:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP54:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP54]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP55:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]]
+; CHECK-NEXT:    [[TMP56:%.*]] = fadd <1 x double> [[TMP53]], [[TMP55]]
+; CHECK-NEXT:    [[BLOCK39:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP57:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP57]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP58:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]]
+; CHECK-NEXT:    [[TMP59:%.*]] = fadd <1 x double> [[TMP56]], [[TMP58]]
+; CHECK-NEXT:    [[TMP60:%.*]] = shufflevector <1 x double> [[TMP59]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP60]], <3 x i32> <i32 3, i32 1, i32 2>
+; CHECK-NEXT:    [[BLOCK42:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP63:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]]
+; CHECK-NEXT:    [[BLOCK45:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP64]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP65:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]]
+; CHECK-NEXT:    [[TMP66:%.*]] = fadd <1 x double> [[TMP63]], [[TMP65]]
+; CHECK-NEXT:    [[BLOCK48:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP67:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP67]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP68:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]]
+; CHECK-NEXT:    [[TMP69:%.*]] = fadd <1 x double> [[TMP66]], [[TMP68]]
+; CHECK-NEXT:    [[TMP70:%.*]] = shufflevector <1 x double> [[TMP69]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP71:%.*]] = shufflevector <3 x double> [[TMP61]], <3 x double> [[TMP70]], <3 x i32> <i32 0, i32 3, i32 2>
+; CHECK-NEXT:    [[BLOCK51:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP72:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP73:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]]
+; CHECK-NEXT:    [[BLOCK54:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP74]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP75:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]]
+; CHECK-NEXT:    [[TMP76:%.*]] = fadd <1 x double> [[TMP73]], [[TMP75]]
+; CHECK-NEXT:    [[BLOCK57:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP77]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP78:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]]
+; CHECK-NEXT:    [[TMP79:%.*]] = fadd <1 x double> [[TMP76]], [[TMP78]]
+; CHECK-NEXT:    [[TMP80:%.*]] = shufflevector <1 x double> [[TMP79]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP81:%.*]] = shufflevector <3 x double> [[TMP71]], <3 x double> [[TMP80]], <3 x i32> <i32 0, i32 1, i32 3>
+; CHECK-NEXT:    [[BLOCK60:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP83:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]]
+; CHECK-NEXT:    [[BLOCK63:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP84:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP84]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP85:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]]
+; CHECK-NEXT:    [[TMP86:%.*]] = fadd <1 x double> [[TMP83]], [[TMP85]]
+; CHECK-NEXT:    [[BLOCK66:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP87:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP87]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP88:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]]
+; CHECK-NEXT:    [[TMP89:%.*]] = fadd <1 x double> [[TMP86]], [[TMP88]]
+; CHECK-NEXT:    [[TMP90:%.*]] = shufflevector <1 x double> [[TMP89]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP91:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP90]], <3 x i32> <i32 3, i32 1, i32 2>
+; CHECK-NEXT:    [[BLOCK69:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP93:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]]
+; CHECK-NEXT:    [[BLOCK72:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP94:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP94]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP95:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]]
+; CHECK-NEXT:    [[TMP96:%.*]] = fadd <1 x double> [[TMP93]], [[TMP95]]
+; CHECK-NEXT:    [[BLOCK75:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:    [[TMP97:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP97]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP98:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]]
+; CHECK-NEXT:    [[TMP99:%.*]] = fadd <1 x double> [[TMP96]], [[TMP98]]
+; CHECK-NEXT:    [[TMP100:%.*]] = shufflevector <1 x double> [[TMP99]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP101:%.*]] = shufflevector <3 x double> [[TMP91]], <3 x double> [[TMP100]], <3 x i32> <i32 0, i32 3, i32 2>
+; CHECK-NEXT:    [[BLOCK78:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP102:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 0
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP103:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]]
+; CHECK-NEXT:    [[BLOCK81:%.*]] = shufflevector <3 x double> [[SPLIT4]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 1
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP104]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP105:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]]
+; CHECK-NEXT:    [[TMP106:%.*]] = fadd <1 x double> [[TMP103]], [[TMP105]]
+; CHECK-NEXT:    [[BLOCK84:%.*]] = shufflevector <3 x double> [[SPLIT5]], <3 x double> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 2
+; CHECK-NEXT:    [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP107]], i32 0
+; CHECK-NEXT:    [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP108:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]]
+; CHECK-NEXT:    [[TMP109:%.*]] = fadd <1 x double> [[TMP106]], [[TMP108]]
+; CHECK-NEXT:    [[TMP110:%.*]] = shufflevector <1 x double> [[TMP109]], <1 x double> poison, <3 x i32> <i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP111:%.*]] = shufflevector <3 x double> [[TMP101]], <3 x double> [[TMP110]], <3 x i32> <i32 0, i32 1, i32 3>
+; CHECK-NEXT:    [[TMP112:%.*]] = shufflevector <3 x double> [[TMP51]], <3 x double> [[TMP81]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP113:%.*]] = shufflevector <3 x double> [[TMP111]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP114:%.*]] = shufflevector <6 x double> [[TMP112]], <6 x double> [[TMP113]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    ret <9 x double> [[TMP114]]
 ;
   %A.trans = tail call <9 x double> @llvm.matrix.transpose.v9f64(<9 x double> %A, i32 3, i32 3)
   %A.foo = call <9 x double> @foo(<9 x double> %A.trans)

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-forward.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-forward.ll
index b8f74e9793c9..51796f3d1b60 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-forward.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-forward.ll
@@ -7,10 +7,10 @@
 define void @transpose_store(<8 x double> %a, <8 x double>* %Ptr) {
 ; CHECK-LABEL: @transpose_store(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> undef, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
@@ -46,10 +46,10 @@ declare <8 x double> @llvm.matrix.transpose(<8 x double>, i32, i32)
 define <8 x double> @transpose_fadd(<8 x double> %a) {
 ; CHECK-LABEL: @transpose_fadd(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> undef, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
@@ -66,8 +66,8 @@ define <8 x double> @transpose_fadd(<8 x double> %a) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP12]], i64 2
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x double> [[TMP13]], double [[TMP14]], i64 3
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP16:%.*]] = fadd <4 x double> [[TMP7]], [[SPLIT4]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = fadd <4 x double> [[TMP15]], [[SPLIT5]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <4 x double> [[TMP16]], <4 x double> [[TMP17]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -82,10 +82,10 @@ entry:
 define <8 x double> @transpose_fmul(<8 x double> %a) {
 ; CHECK-LABEL: @transpose_fmul(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> undef, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
@@ -102,8 +102,8 @@ define <8 x double> @transpose_fmul(<8 x double> %a) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP12]], i64 2
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x double> [[TMP13]], double [[TMP14]], i64 3
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP16:%.*]] = fmul <4 x double> [[TMP7]], [[SPLIT4]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = fmul <4 x double> [[TMP15]], [[SPLIT5]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <4 x double> [[TMP16]], <4 x double> [[TMP17]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-mixed-users.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-mixed-users.ll
index 614fdd860a16..603db2330e3d 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-mixed-users.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-mixed-users.ll
@@ -6,8 +6,8 @@
 ; matrix in a flat vector for function calls and returns.
 define <8 x double> @strided_load_4x4(<8 x double> %in, <8 x double>* %Ptr) {
 ; CHECK-LABEL: @strided_load_4x4(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[IN:%.*]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[IN]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[IN:%.*]], <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[IN]], <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x double> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[TMP1]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[SPLIT1]], i64 0

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/store-align-volatile.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/store-align-volatile.ll
index 6688dadbac0a..2ed077ab9ce4 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/store-align-volatile.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/store-align-volatile.ll
@@ -3,8 +3,8 @@
 
 define void @strided_store_volatile(<6 x i32> %in, i32* %out) {
 ; CHECK-LABEL: @strided_store_volatile(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[OUT:%.*]] to <3 x i32>*
 ; CHECK-NEXT:    store volatile <3 x i32> [[SPLIT]], <3 x i32>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[OUT]], i64 5
@@ -38,8 +38,8 @@ declare <4 x i32> @llvm.matrix.multiply(<4 x i32>, <4 x i32>, i32, i32, i32)
 
 define void @strided_store_align32(<6 x i32> %in, i64 %stride, i32* %out) {
 ; CHECK-LABEL: @strided_store_align32(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[OUT:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <3 x i32>*
@@ -56,8 +56,8 @@ define void @strided_store_align32(<6 x i32> %in, i64 %stride, i32* %out) {
 
 define void @strided_store_align2(<6 x i32> %in, i64 %stride, i32* %out) {
 ; CHECK-LABEL: @strided_store_align2(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[OUT:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <3 x i32>*
@@ -89,9 +89,9 @@ define void @multiply_store_align16_stride8(<4 x i32> %in, <4 x i32>* %out) {
 
 define void @strided_store_align8_stride12(<6 x i32> %in, i32* %out) {
 ; CHECK-LABEL: @strided_store_align8_stride12(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> undef, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> poison, <2 x i32> <i32 4, i32 5>
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[OUT:%.*]] to <2 x i32>*
 ; CHECK-NEXT:    store <2 x i32> [[SPLIT]], <2 x i32>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[OUT]], i64 3

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll
index 74820d342d75..aaf1b114cc30 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll
@@ -6,21 +6,21 @@ define <9 x double> @strided_load_3x3(double* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_3x3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[IN:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* %in, i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* [[IN]], i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast double* [[VEC_GEP2]] to <3 x double>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST3]], align 8
 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* %in, i64 [[VEC_START5]]
+; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* [[IN]], i64 [[VEC_START5]]
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <3 x double>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST7]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD4]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x double> [[COL_LOAD8]], <3 x double> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <6 x double> [[TMP1]], <6 x double> [[TMP2]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
-; CHECK-NEXT:    ret <9 x double> [[TMP3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD4]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD8]], <3 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    ret <9 x double> [[TMP2]]
 ;
 entry:
   %load = call <9 x double> @llvm.matrix.column.major.load(double* %in, i64 %stride, i1 false, i32 3, i32 3)
@@ -33,7 +33,7 @@ define <9 x double> @strided_load_9x1(double* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_9x1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[IN:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <9 x double>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <9 x double>, <9 x double>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    ret <9 x double> [[COL_LOAD]]
@@ -50,15 +50,15 @@ define <8 x double> @strided_load_4x2(double* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_4x2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[IN:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <4 x double>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <4 x double>, <4 x double>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* %in, i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* [[IN]], i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast double* [[VEC_GEP2]] to <4 x double>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <4 x double>, <4 x double>* [[VEC_CAST3]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[COL_LOAD]], <4 x double> [[COL_LOAD4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x double> [[COL_LOAD]], <4 x double> [[COL_LOAD4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x double> [[TMP0]]
 ;
 entry:
   %load = call <8 x double> @llvm.matrix.column.major.load.v8f64(double* %in, i64 %stride, i1 false, i32 4, i32 2)

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll
index 6b48a1709bde..a632f7c3bac1 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll
@@ -6,21 +6,21 @@ define <9 x float> @strided_load_3x3(float* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_3x3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* %in, i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* [[IN:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* [[VEC_GEP]] to <3 x float>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <3 x float>, <3 x float>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr float, float* %in, i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr float, float* [[IN]], i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast float* [[VEC_GEP2]] to <3 x float>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <3 x float>, <3 x float>* [[VEC_CAST3]], align 4
 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr float, float* %in, i64 [[VEC_START5]]
+; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr float, float* [[IN]], i64 [[VEC_START5]]
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast float* [[VEC_GEP6]] to <3 x float>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <3 x float>, <3 x float>* [[VEC_CAST7]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x float> [[COL_LOAD]], <3 x float> [[COL_LOAD4]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x float> [[COL_LOAD8]], <3 x float> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <6 x float> [[TMP1]], <6 x float> [[TMP2]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
-; CHECK-NEXT:    ret <9 x float> [[TMP3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[COL_LOAD]], <3 x float> [[COL_LOAD4]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x float> [[COL_LOAD8]], <3 x float> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <6 x float> [[TMP0]], <6 x float> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    ret <9 x float> [[TMP2]]
 ;
 entry:
   %load = call <9 x float> @llvm.matrix.column.major.load(float* %in, i64 %stride, i1 false, i32 3, i32 3)
@@ -33,7 +33,7 @@ define <9 x float> @strided_load_9x1(float* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_9x1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* %in, i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* [[IN:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* [[VEC_GEP]] to <9 x float>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <9 x float>, <9 x float>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    ret <9 x float> [[COL_LOAD]]
@@ -49,15 +49,15 @@ define <8 x float> @strided_load_4x2(float* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_4x2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* %in, i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* [[IN:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* [[VEC_GEP]] to <4 x float>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <4 x float>, <4 x float>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr float, float* %in, i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr float, float* [[IN]], i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast float* [[VEC_GEP2]] to <4 x float>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[VEC_CAST3]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x float> [[TMP0]]
 ;
 entry:
   %load = call <8 x float> @llvm.matrix.column.major.load.v8f32(float* %in, i64 %stride, i1 false, i32 4, i32 2)

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll
index 4f815af6d11c..3bf47afa4812 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll
@@ -6,21 +6,21 @@ define <9 x i32> @strided_load_3x3(i32* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_3x3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[IN:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <3 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <3 x i32>, <3 x i32>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr i32, i32* [[IN]], i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast i32* [[VEC_GEP2]] to <3 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <3 x i32>, <3 x i32>* [[VEC_CAST3]], align 4
 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START5]]
+; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr i32, i32* [[IN]], i64 [[VEC_START5]]
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast i32* [[VEC_GEP6]] to <3 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <3 x i32>, <3 x i32>* [[VEC_CAST7]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[COL_LOAD]], <3 x i32> [[COL_LOAD4]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i32> [[COL_LOAD8]], <3 x i32> undef, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <6 x i32> [[TMP1]], <6 x i32> [[TMP2]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
-; CHECK-NEXT:    ret <9 x i32> [[TMP3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[COL_LOAD]], <3 x i32> [[COL_LOAD4]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[COL_LOAD8]], <3 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <6 x i32> [[TMP0]], <6 x i32> [[TMP1]], <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT:    ret <9 x i32> [[TMP2]]
 ;
 entry:
   %load = call <9 x i32> @llvm.matrix.column.major.load(i32* %in, i64 %stride, i1 false, i32 3, i32 3)
@@ -33,7 +33,7 @@ define <9 x i32> @strided_load_9x1(i32* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_9x1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[IN:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <9 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <9 x i32>, <9 x i32>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    ret <9 x i32> [[COL_LOAD]]
@@ -49,15 +49,15 @@ define <8 x i32> @strided_load_4x2(i32* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_4x2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[IN:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <4 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr i32, i32* [[IN]], i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast i32* [[VEC_GEP2]] to <4 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[VEC_CAST3]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[COL_LOAD]], <4 x i32> [[COL_LOAD4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[COL_LOAD]], <4 x i32> [[COL_LOAD4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x i32> [[TMP0]]
 ;
 entry:
   %load = call <8 x i32> @llvm.matrix.column.major.load.v8i32(i32* %in, i64 %stride, i1 false, i32 4, i32 2)

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll
index 3b0afec92dcc..817f989ba550 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll
@@ -4,8 +4,8 @@
 
 define void @strided_store_3x2(<6 x double> %in, double* %out) {
 ; CHECK-LABEL: @strided_store_3x2(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x double> [[IN:%.*]], <6 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x double> [[IN]], <6 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x double> [[IN:%.*]], <6 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x double> [[IN]], <6 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[OUT:%.*]] to <3 x double>*
 ; CHECK-NEXT:    store <3 x double> [[SPLIT]], <3 x double>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[OUT]], i64 5
@@ -19,8 +19,8 @@ define void @strided_store_3x2(<6 x double> %in, double* %out) {
 
 define void @strided_store_3x2_nonconst_stride(<6 x double> %in, i64 %stride, double* %out) {
 ; CHECK-LABEL: @strided_store_3x2_nonconst_stride(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x double> [[IN:%.*]], <6 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x double> [[IN]], <6 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x double> [[IN:%.*]], <6 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x double> [[IN]], <6 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[OUT:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>*
@@ -37,11 +37,11 @@ define void @strided_store_3x2_nonconst_stride(<6 x double> %in, i64 %stride, do
 
 define void @strided_store_2x3(<10 x double> %in, double* %out) {
 ; CHECK-LABEL: @strided_store_2x3(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <10 x double> [[IN:%.*]], <10 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <10 x double> [[IN]], <10 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <10 x double> [[IN]], <10 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <10 x double> [[IN]], <10 x double> undef, <2 x i32> <i32 6, i32 7>
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <10 x double> [[IN]], <10 x double> undef, <2 x i32> <i32 8, i32 9>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <10 x double> [[IN:%.*]], <10 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <10 x double> [[IN]], <10 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <10 x double> [[IN]], <10 x double> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <10 x double> [[IN]], <10 x double> poison, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <10 x double> [[IN]], <10 x double> poison, <2 x i32> <i32 8, i32 9>
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[OUT:%.*]] to <2 x double>*
 ; CHECK-NEXT:    store <2 x double> [[SPLIT]], <2 x double>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[OUT]], i64 4

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-float.ll
index 080fa3f5b2e7..95ac5b7f83b1 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-float.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-float.ll
@@ -4,8 +4,8 @@
 
 define void @strided_store_3x2(<6 x float> %in, float* %out) {
 ; CHECK-LABEL: @strided_store_3x2(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x float> [[IN:%.*]], <6 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x float> [[IN]], <6 x float> undef, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x float> [[IN:%.*]], <6 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x float> [[IN]], <6 x float> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* [[OUT:%.*]] to <3 x float>*
 ; CHECK-NEXT:    store <3 x float> [[SPLIT]], <3 x float>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* [[OUT]], i64 5
@@ -19,8 +19,8 @@ define void @strided_store_3x2(<6 x float> %in, float* %out) {
 
 define void @strided_store_3x2_nonconst_stride(<6 x float> %in, i64 %stride, float* %out) {
 ; CHECK-LABEL: @strided_store_3x2_nonconst_stride(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x float> [[IN:%.*]], <6 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x float> [[IN]], <6 x float> undef, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x float> [[IN:%.*]], <6 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x float> [[IN]], <6 x float> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* [[OUT:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* [[VEC_GEP]] to <3 x float>*
@@ -40,11 +40,11 @@ declare void @llvm.matrix.column.major.store(<6 x float>, float*, i64, i1, i32,
 
 define void @strided_store_2x3(<10 x float> %in, float* %out) {
 ; CHECK-LABEL: @strided_store_2x3(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <10 x float> [[IN:%.*]], <10 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <10 x float> [[IN]], <10 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <10 x float> [[IN]], <10 x float> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <10 x float> [[IN]], <10 x float> undef, <2 x i32> <i32 6, i32 7>
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <10 x float> [[IN]], <10 x float> undef, <2 x i32> <i32 8, i32 9>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <10 x float> [[IN:%.*]], <10 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <10 x float> [[IN]], <10 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <10 x float> [[IN]], <10 x float> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <10 x float> [[IN]], <10 x float> poison, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <10 x float> [[IN]], <10 x float> poison, <2 x i32> <i32 8, i32 9>
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* [[OUT:%.*]] to <2 x float>*
 ; CHECK-NEXT:    store <2 x float> [[SPLIT]], <2 x float>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* [[OUT]], i64 4

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-i32.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-i32.ll
index 6287a8215acb..be6a18dc5916 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-i32.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-i32.ll
@@ -4,8 +4,8 @@
 
 define void @strided_store_3x2(<6 x i32> %in, i32* %out) {
 ; CHECK-LABEL: @strided_store_3x2(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[OUT:%.*]] to <3 x i32>*
 ; CHECK-NEXT:    store <3 x i32> [[SPLIT]], <3 x i32>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[OUT]], i64 5
@@ -19,8 +19,8 @@ define void @strided_store_3x2(<6 x i32> %in, i32* %out) {
 
 define void @strided_store_3x2_nonconst_stride(<6 x i32> %in, i64 %stride, i32* %out) {
 ; CHECK-LABEL: @strided_store_3x2_nonconst_stride(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> poison, <3 x i32> <i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[OUT:%.*]], i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <3 x i32>*
@@ -40,11 +40,11 @@ declare void @llvm.matrix.column.major.store(<6 x i32>, i32*, i64, i1, i32, i32)
 
 define void @strided_store_2x3(<10 x i32> %in, i32* %out) {
 ; CHECK-LABEL: @strided_store_2x3(
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <10 x i32> [[IN:%.*]], <10 x i32> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <10 x i32> [[IN]], <10 x i32> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <10 x i32> [[IN]], <10 x i32> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <10 x i32> [[IN]], <10 x i32> undef, <2 x i32> <i32 6, i32 7>
-; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <10 x i32> [[IN]], <10 x i32> undef, <2 x i32> <i32 8, i32 9>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <10 x i32> [[IN:%.*]], <10 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <10 x i32> [[IN]], <10 x i32> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <10 x i32> [[IN]], <10 x i32> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <10 x i32> [[IN]], <10 x i32> poison, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT4:%.*]] = shufflevector <10 x i32> [[IN]], <10 x i32> poison, <2 x i32> <i32 8, i32 9>
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[OUT:%.*]] to <2 x i32>*
 ; CHECK-NEXT:    store <2 x i32> [[SPLIT]], <2 x i32>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[OUT]], i64 4

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double-row-major.ll
index 5d607f4b57f1..3fa95e3e1303 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double-row-major.ll
@@ -4,8 +4,8 @@
 define <8 x double> @transpose(<8 x double> %a) {
 ; RM-LABEL: @transpose(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <4 x double> [[SPLIT]], i64 0
 ; RM-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[TMP0]], i64 0
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[SPLIT1]], i64 0
@@ -37,14 +37,14 @@ declare <8 x double> @llvm.matrix.transpose(<8 x double>, i32, i32)
 define <8 x double> @transpose_single_column(<8 x double> %a) {
 ; RM-LABEL: @transpose_single_column(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> undef, <1 x i32> zeroinitializer
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <1 x i32> <i32 1>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <1 x i32> <i32 2>
-; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <1 x i32> <i32 3>
-; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <1 x i32> <i32 4>
-; RM-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <1 x i32> <i32 5>
-; RM-NEXT:    [[SPLIT6:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <1 x i32> <i32 6>
-; RM-NEXT:    [[SPLIT7:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <1 x i32> <i32 7>
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <1 x i32> <i32 1>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <1 x i32> <i32 2>
+; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <1 x i32> <i32 3>
+; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <1 x i32> <i32 4>
+; RM-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <1 x i32> <i32 5>
+; RM-NEXT:    [[SPLIT6:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <1 x i32> <i32 6>
+; RM-NEXT:    [[SPLIT7:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <1 x i32> <i32 7>
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <1 x double> [[SPLIT]], i64 0
 ; RM-NEXT:    [[TMP1:%.*]] = insertelement <8 x double> undef, double [[TMP0]], i64 0
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0
@@ -73,9 +73,9 @@ declare <12 x double> @llvm.matrix.transpose.v12f64(<12 x double>, i32, i32)
 define <12 x double> @transpose_double_3x4(<12 x double> %a) {
 ; RM-LABEL: @transpose_double_3x4(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x double> [[A:%.*]], <12 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x double> [[A]], <12 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x double> [[A]], <12 x double> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x double> [[A:%.*]], <12 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x double> [[A]], <12 x double> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x double> [[A]], <12 x double> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <4 x double> [[SPLIT]], i64 0
 ; RM-NEXT:    [[TMP1:%.*]] = insertelement <3 x double> undef, double [[TMP0]], i64 0
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[SPLIT1]], i64 0

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double.ll
index 6bea523bb5e7..47dfa72aa02a 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double.ll
@@ -6,10 +6,10 @@
 define <8 x double> @transpose(<8 x double> %a) {
 ; CHECK-LABEL: @transpose(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> <i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> undef, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0
@@ -39,7 +39,7 @@ declare <8 x double> @llvm.matrix.transpose(<8 x double>, i32, i32)
 define <8 x double> @transpose_single_column(<8 x double> %a) {
 ; CHECK-LABEL: @transpose_single_column(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x double> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <1 x double> undef, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x double> [[SPLIT]], i64 1
@@ -75,10 +75,10 @@ declare <12 x double> @llvm.matrix.transpose.v12f64(<12 x double>, i32, i32)
 define <12 x double> @transpose_double_3x4(<12 x double> %a) {
 ; CHECK-LABEL: @transpose_double_3x4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x double> [[A:%.*]], <12 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x double> [[A]], <12 x double> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x double> [[A]], <12 x double> undef, <3 x i32> <i32 6, i32 7, i32 8>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <12 x double> [[A]], <12 x double> undef, <3 x i32> <i32 9, i32 10, i32 11>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x double> [[A:%.*]], <12 x double> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x double> [[A]], <12 x double> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x double> [[A]], <12 x double> poison, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <12 x double> [[A]], <12 x double> poison, <3 x i32> <i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x double> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> undef, double [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 0
@@ -104,7 +104,7 @@ define <12 x double> @transpose_double_3x4(<12 x double> %a) {
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2
 ; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x double> [[TMP21]], double [[TMP22]], i64 3
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP15]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> [[TMP25]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    ret <12 x double> [[TMP26]]
 ;

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float-row-major.ll
index 8a9bf5ae3be4..1d2cdf3b7b1a 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float-row-major.ll
@@ -4,8 +4,8 @@
 define <8 x float> @transpose(<8 x float> %a) {
 ; RM-LABEL: @transpose(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[SPLIT]], i64 0
 ; RM-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float [[TMP0]], i64 0
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[SPLIT1]], i64 0
@@ -37,14 +37,14 @@ declare <8 x float> @llvm.matrix.transpose(<8 x float>, i32, i32)
 define <8 x float> @transpose_single_column(<8 x float> %a) {
 ; RM-LABEL: @transpose_single_column(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> undef, <1 x i32> zeroinitializer
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <1 x i32> <i32 1>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <1 x i32> <i32 2>
-; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <1 x i32> <i32 3>
-; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <1 x i32> <i32 4>
-; RM-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <1 x i32> <i32 5>
-; RM-NEXT:    [[SPLIT6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <1 x i32> <i32 6>
-; RM-NEXT:    [[SPLIT7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <1 x i32> <i32 7>
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <1 x i32> <i32 1>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <1 x i32> <i32 2>
+; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <1 x i32> <i32 3>
+; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <1 x i32> <i32 4>
+; RM-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <1 x i32> <i32 5>
+; RM-NEXT:    [[SPLIT6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <1 x i32> <i32 6>
+; RM-NEXT:    [[SPLIT7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <1 x i32> <i32 7>
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <1 x float> [[SPLIT]], i64 0
 ; RM-NEXT:    [[TMP1:%.*]] = insertelement <8 x float> undef, float [[TMP0]], i64 0
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <1 x float> [[SPLIT1]], i64 0
@@ -73,9 +73,9 @@ declare <12 x float> @llvm.matrix.transpose.v12f32(<12 x float>, i32, i32)
 define <12 x float> @transpose_float_3x4(<12 x float> %a) {
 ; RM-LABEL: @transpose_float_3x4(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x float> [[A:%.*]], <12 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x float> [[A]], <12 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x float> [[A]], <12 x float> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x float> [[A:%.*]], <12 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x float> [[A]], <12 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x float> [[A]], <12 x float> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[SPLIT]], i64 0
 ; RM-NEXT:    [[TMP1:%.*]] = insertelement <3 x float> undef, float [[TMP0]], i64 0
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[SPLIT1]], i64 0

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float.ll
index f54f5746b3da..5f47129d198d 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float.ll
@@ -6,10 +6,10 @@
 define <8 x float> @transpose(<8 x float> %a) {
 ; CHECK-LABEL: @transpose(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x float> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[SPLIT1]], i64 0
@@ -39,7 +39,7 @@ declare <8 x float> @llvm.matrix.transpose(<8 x float>, i32, i32)
 define <8 x float> @transpose_single_column(<8 x float> %a) {
 ; CHECK-LABEL: @transpose_single_column(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x float> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <1 x float> undef, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[SPLIT]], i64 1
@@ -75,10 +75,10 @@ declare <12 x float> @llvm.matrix.transpose.v12f32(<12 x float>, i32, i32)
 define <12 x float> @transpose_float_3x4(<12 x float> %a) {
 ; CHECK-LABEL: @transpose_float_3x4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x float> [[A:%.*]], <12 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x float> [[A]], <12 x float> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x float> [[A]], <12 x float> undef, <3 x i32> <i32 6, i32 7, i32 8>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <12 x float> [[A]], <12 x float> undef, <3 x i32> <i32 9, i32 10, i32 11>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x float> [[A:%.*]], <12 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x float> [[A]], <12 x float> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x float> [[A]], <12 x float> poison, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <12 x float> [[A]], <12 x float> poison, <3 x i32> <i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x float> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x float> [[SPLIT1]], i64 0
@@ -104,7 +104,7 @@ define <12 x float> @transpose_float_3x4(<12 x float> %a) {
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <3 x float> [[SPLIT3]], i64 2
 ; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP22]], i64 3
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP15]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <8 x float> [[TMP24]], <8 x float> [[TMP25]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    ret <12 x float> [[TMP26]]
 ;

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32-row-major.ll
index 2f23d5fd8fec..7c608951faca 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32-row-major.ll
@@ -4,8 +4,8 @@
 define <8 x i32> @transpose(<8 x i32> %a) {
 ; RM-LABEL: @transpose(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> [[SPLIT]], i64 0
 ; RM-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[TMP0]], i64 0
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[SPLIT1]], i64 0
@@ -37,14 +37,14 @@ declare <8 x i32> @llvm.matrix.transpose(<8 x i32>, i32, i32)
 define <8 x i32> @transpose_single_column(<8 x i32> %a) {
 ; RM-LABEL: @transpose_single_column(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <1 x i32> zeroinitializer
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <1 x i32> <i32 1>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <1 x i32> <i32 2>
-; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <1 x i32> <i32 3>
-; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <1 x i32> <i32 4>
-; RM-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <1 x i32> <i32 5>
-; RM-NEXT:    [[SPLIT6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <1 x i32> <i32 6>
-; RM-NEXT:    [[SPLIT7:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <1 x i32> <i32 7>
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <1 x i32> zeroinitializer
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 1>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 2>
+; RM-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 3>
+; RM-NEXT:    [[SPLIT4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 4>
+; RM-NEXT:    [[SPLIT5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 5>
+; RM-NEXT:    [[SPLIT6:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 6>
+; RM-NEXT:    [[SPLIT7:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <1 x i32> <i32 7>
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <1 x i32> [[SPLIT]], i64 0
 ; RM-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[TMP0]], i64 0
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0
@@ -73,9 +73,9 @@ declare <12 x i32> @llvm.matrix.transpose.v12i32(<12 x i32>, i32, i32)
 define <12 x i32> @transpose_i32_3x4(<12 x i32> %a) {
 ; RM-LABEL: @transpose_i32_3x4(
 ; RM-NEXT:  entry:
-; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x i32> [[A:%.*]], <12 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x i32> [[A]], <12 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x i32> [[A]], <12 x i32> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; RM-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x i32> [[A:%.*]], <12 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; RM-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x i32> [[A]], <12 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; RM-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x i32> [[A]], <12 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
 ; RM-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> [[SPLIT]], i64 0
 ; RM-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[TMP0]], i64 0
 ; RM-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[SPLIT1]], i64 0

diff  --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32.ll
index f5ea365d7554..228dc4315bfc 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32.ll
@@ -6,10 +6,10 @@
 define <8 x i32> @transpose(<8 x i32> %a) {
 ; CHECK-LABEL: @transpose(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <2 x i32> <i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> <i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0
@@ -39,7 +39,7 @@ declare <8 x i32> @llvm.matrix.transpose(<8 x i32>, i32, i32)
 define <8 x i32> @transpose_single_column(<8 x i32> %a) {
 ; CHECK-LABEL: @transpose_single_column(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <1 x i32> undef, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 1
@@ -75,10 +75,10 @@ declare <12 x i32> @llvm.matrix.transpose.v12i32(<12 x i32>, i32, i32)
 define <12 x i32> @transpose_i32_3x4(<12 x i32> %a) {
 ; CHECK-LABEL: @transpose_i32_3x4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x i32> [[A:%.*]], <12 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x i32> [[A]], <12 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x i32> [[A]], <12 x i32> undef, <3 x i32> <i32 6, i32 7, i32 8>
-; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <12 x i32> [[A]], <12 x i32> undef, <3 x i32> <i32 9, i32 10, i32 11>
+; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <12 x i32> [[A:%.*]], <12 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[SPLIT1:%.*]] = shufflevector <12 x i32> [[A]], <12 x i32> poison, <3 x i32> <i32 3, i32 4, i32 5>
+; CHECK-NEXT:    [[SPLIT2:%.*]] = shufflevector <12 x i32> [[A]], <12 x i32> poison, <3 x i32> <i32 6, i32 7, i32 8>
+; CHECK-NEXT:    [[SPLIT3:%.*]] = shufflevector <12 x i32> [[A]], <12 x i32> poison, <3 x i32> <i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <3 x i32> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x i32> [[SPLIT1]], i64 0
@@ -104,7 +104,7 @@ define <12 x i32> @transpose_i32_3x4(<12 x i32> %a) {
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <3 x i32> [[SPLIT3]], i64 2
 ; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP22]], i64 3
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> [[TMP15]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <8 x i32> [[TMP24]], <8 x i32> [[TMP25]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    ret <12 x i32> [[TMP26]]
 ;

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
index d391687a3980..2c3efaa91567 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
@@ -13,9 +13,9 @@ define i32 @add_v4i32(i32* %p) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, [[TBAA0:!tbaa !.*]]
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF3]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[BIN_RDX4]], i32 0
 ; CHECK-NEXT:    ret i32 [[TMP2]]
@@ -52,11 +52,11 @@ define signext i16 @mul_v8i16(i16* %p) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[P:%.*]] to <8 x i16>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2, [[TBAA4:!tbaa !.*]]
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = mul <8 x i16> [[TMP1]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = mul <8 x i16> [[BIN_RDX]], [[RDX_SHUF3]]
-; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX4]], <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX4]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX6:%.*]] = mul <8 x i16> [[BIN_RDX4]], [[RDX_SHUF5]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i16> [[BIN_RDX6]], i32 0
 ; CHECK-NEXT:    ret i16 [[TMP2]]
@@ -96,13 +96,13 @@ define signext i8 @or_v16i8(i8* %p) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P:%.*]] to <16 x i8>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1, [[TBAA6:!tbaa !.*]]
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <16 x i8> [[TMP1]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX5:%.*]] = or <16 x i8> [[BIN_RDX]], [[RDX_SHUF4]]
-; CHECK-NEXT:    [[RDX_SHUF6:%.*]] = shufflevector <16 x i8> [[BIN_RDX5]], <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF6:%.*]] = shufflevector <16 x i8> [[BIN_RDX5]], <16 x i8> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX7:%.*]] = or <16 x i8> [[BIN_RDX5]], [[RDX_SHUF6]]
-; CHECK-NEXT:    [[RDX_SHUF8:%.*]] = shufflevector <16 x i8> [[BIN_RDX7]], <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF8:%.*]] = shufflevector <16 x i8> [[BIN_RDX7]], <16 x i8> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX9:%.*]] = or <16 x i8> [[BIN_RDX7]], [[RDX_SHUF8]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x i8> [[BIN_RDX9]], i32 0
 ; CHECK-NEXT:    ret i8 [[TMP2]]
@@ -142,10 +142,10 @@ define i32 @smin_v4i32(i32* %p) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, [[TBAA0]]
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP1]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP4:%.*]] = icmp slt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0
@@ -196,10 +196,10 @@ define i32 @umax_v4i32(i32* %p) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, [[TBAA0]]
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[RDX_SHUF]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[RDX_MINMAX_CMP4:%.*]] = icmp ugt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0
@@ -250,9 +250,9 @@ define float @fadd_v4i32(float* %p) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, [[TBAA7:!tbaa !.*]]
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0
 ; CHECK-NEXT:    [[BIN_RDX5:%.*]] = fadd fast float -0.000000e+00, [[TMP2]]
@@ -291,9 +291,9 @@ define float @fmul_v4i32(float* %p) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, [[TBAA7]]
-; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[TMP1]], [[RDX_SHUF]]
-; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0
 ; CHECK-NEXT:    [[BIN_RDX5:%.*]] = fmul fast float 1.000000e+00, [[TMP2]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll
index 397e98eb881d..2324952888f1 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll
@@ -21,18 +21,18 @@ define i32 @smaxv6() {
 ; GFX9-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
 ; GFX9-NEXT:    [[SELECT1:%.*]] = select i1 [[CMP1]], i32 [[TMP2]], i32 [[TMP3]]
 ; GFX9-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP4]], [[RDX_SHUF]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP4]], <4 x i32> [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
-; GFX9-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], [[SELECT1]]
-; GFX9-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 [[SELECT1]]
+; GFX9-NEXT:    [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP5]], [[SELECT1]]
+; GFX9-NEXT:    [[OP_EXTRA4:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP5]], i32 [[SELECT1]]
 ; GFX9-NEXT:    [[STORE_SELECT:%.*]] = select i1 [[CMP1]], i32 3, i32 4
 ; GFX9-NEXT:    store i32 [[STORE_SELECT]], i32* @var, align 8
-; GFX9-NEXT:    ret i32 [[OP_EXTRA]]
+; GFX9-NEXT:    ret i32 [[OP_EXTRA4]]
 ;
   %load1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
   %load2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
@@ -68,18 +68,18 @@ define i64 @sminv6() {
 ; GFX9-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[TMP2]], [[TMP3]]
 ; GFX9-NEXT:    [[SELECT1:%.*]] = select i1 [[CMP1]], i64 [[TMP2]], i64 [[TMP3]]
 ; GFX9-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([32 x i64], [32 x i64]* @arr64, i64 0, i64 2) to <4 x i64>*), align 16
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i64> [[TMP4]], [[RDX_SHUF]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i64> [[TMP4]], <4 x i64> [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[RDX_MINMAX_SELECT]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[RDX_MINMAX_SELECT]], <4 x i64> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp slt <4 x i64> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i64> [[RDX_MINMAX_SELECT]], <4 x i64> [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[RDX_MINMAX_SELECT3]], i32 0
-; GFX9-NEXT:    [[TMP6:%.*]] = icmp slt i64 [[TMP5]], [[SELECT1]]
-; GFX9-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP6]], i64 [[TMP5]], i64 [[SELECT1]]
+; GFX9-NEXT:    [[OP_EXTRA:%.*]] = icmp slt i64 [[TMP5]], [[SELECT1]]
+; GFX9-NEXT:    [[OP_EXTRA4:%.*]] = select i1 [[OP_EXTRA]], i64 [[TMP5]], i64 [[SELECT1]]
 ; GFX9-NEXT:    [[STORE_SELECT:%.*]] = select i1 [[CMP1]], i64 3, i64 4
 ; GFX9-NEXT:    store i64 [[STORE_SELECT]], i64* @var64, align 8
-; GFX9-NEXT:    ret i64 [[OP_EXTRA]]
+; GFX9-NEXT:    ret i64 [[OP_EXTRA4]]
 ;
   %load1 = load i64, i64* getelementptr inbounds ([32 x i64], [32 x i64]* @arr64, i64 0, i64 0), align 16
   %load2 = load i64, i64* getelementptr inbounds ([32 x i64], [32 x i64]* @arr64, i64 0, i64 1), align 8
@@ -217,18 +217,18 @@ define i32 @smax_w
diff _valuenum(i32, i32 %v1) {
 ; GFX9-NEXT:    [[EX0:%.*]] = extractelement <2 x i32> [[VLOAD]], i32 0
 ; GFX9-NEXT:    [[SELECT1:%.*]] = select i1 [[CMP1]], i32 [[EX0]], i32 [[V1]]
 ; GFX9-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP2]], [[RDX_SHUF]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP2]], <4 x i32> [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
-; GFX9-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[SELECT1]]
-; GFX9-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[SELECT1]]
+; GFX9-NEXT:    [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP3]], [[SELECT1]]
+; GFX9-NEXT:    [[OP_EXTRA4:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP3]], i32 [[SELECT1]]
 ; GFX9-NEXT:    [[STOREVAL:%.*]] = select i1 [[CMP1]], i32 3, i32 4
 ; GFX9-NEXT:    store i32 [[STOREVAL]], i32* @var, align 8
-; GFX9-NEXT:    ret i32 [[OP_EXTRA]]
+; GFX9-NEXT:    ret i32 [[OP_EXTRA4]]
 ;
   %vload = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
   %elt1 = extractelement <2 x i32> %vload, i32 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll
index f97b1243f954..c4bf51b0a6c5 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll
@@ -5,9 +5,9 @@
 define half @reduction_half4(<4 x half> %a) {
 ; GFX9-LABEL: @reduction_half4(
 ; GFX9-NEXT:  entry:
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x half> [[A]], [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x half> [[BIN_RDX]], <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x half> [[BIN_RDX]], <4 x half> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x half> [[BIN_RDX]], [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <4 x half> [[BIN_RDX2]], i32 0
 ; GFX9-NEXT:    ret half [[TMP0]]
@@ -39,11 +39,11 @@ entry:
 define half @reduction_half8(<8 x half> %vec8) {
 ; GFX9-LABEL: @reduction_half8(
 ; GFX9-NEXT:  entry:
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x half> [[VEC8:%.*]], <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x half> [[VEC8:%.*]], <8 x half> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX:%.*]] = fadd fast <8 x half> [[VEC8]], [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x half> [[BIN_RDX]], <8 x half> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x half> [[BIN_RDX]], <8 x half> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <8 x half> [[BIN_RDX]], [[RDX_SHUF1]]
-; GFX9-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x half> [[BIN_RDX2]], <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x half> [[BIN_RDX2]], <8 x half> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <8 x half> [[BIN_RDX2]], [[RDX_SHUF3]]
 ; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <8 x half> [[BIN_RDX4]], i32 0
 ; GFX9-NEXT:    ret half [[TMP0]]
@@ -91,13 +91,13 @@ entry:
 define half @reduction_half16(<16 x half> %vec16) {
 ; GFX9-LABEL: @reduction_half16(
 ; GFX9-NEXT:  entry:
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x half> [[VEC16:%.*]], <16 x half> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x half> [[VEC16:%.*]], <16 x half> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX:%.*]] = fadd fast <16 x half> [[VEC16]], [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x half> [[BIN_RDX]], <16 x half> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x half> [[BIN_RDX]], <16 x half> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <16 x half> [[BIN_RDX]], [[RDX_SHUF1]]
-; GFX9-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <16 x half> [[BIN_RDX2]], <16 x half> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <16 x half> [[BIN_RDX2]], <16 x half> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <16 x half> [[BIN_RDX2]], [[RDX_SHUF3]]
-; GFX9-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x half> [[BIN_RDX4]], <16 x half> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <16 x half> [[BIN_RDX4]], <16 x half> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX6:%.*]] = fadd fast <16 x half> [[BIN_RDX4]], [[RDX_SHUF5]]
 ; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <16 x half> [[BIN_RDX6]], i32 0
 ; GFX9-NEXT:    ret half [[TMP0]]
@@ -203,9 +203,9 @@ entry:
 define i16 @reduction_v4i16(<4 x i16> %a) {
 ; GFX9-LABEL: @reduction_v4i16(
 ; GFX9-NEXT:  entry:
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[A:%.*]], <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[A:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX:%.*]] = add <4 x i16> [[A]], [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[BIN_RDX]], <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[BIN_RDX]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX2:%.*]] = add <4 x i16> [[BIN_RDX]], [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <4 x i16> [[BIN_RDX2]], i32 0
 ; GFX9-NEXT:    ret i16 [[TMP0]]
@@ -237,11 +237,11 @@ entry:
 define i16 @reduction_v8i16(<8 x i16> %vec8) {
 ; GFX9-LABEL: @reduction_v8i16(
 ; GFX9-NEXT:  entry:
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[VEC8:%.*]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[VEC8:%.*]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX:%.*]] = add <8 x i16> [[VEC8]], [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX2:%.*]] = add <8 x i16> [[BIN_RDX]], [[RDX_SHUF1]]
-; GFX9-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX2]], <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX2]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i16> [[BIN_RDX2]], [[RDX_SHUF3]]
 ; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <8 x i16> [[BIN_RDX4]], i32 0
 ; GFX9-NEXT:    ret i16 [[TMP0]]
@@ -289,10 +289,10 @@ entry:
 define i16 @reduction_umin_v4i16(<4 x i16> %vec4) {
 ; GFX9-LABEL: @reduction_umin_v4i16(
 ; GFX9-NEXT:  entry:
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <4 x i16> [[VEC4]], [[RDX_SHUF]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i16> [[VEC4]], <4 x i16> [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp ult <4 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <4 x i16> [[RDX_MINMAX_SELECT3]], i32 0
@@ -331,13 +331,13 @@ entry:
 define i16 @reduction_icmp_v8i16(<8 x i16> %vec8) {
 ; GFX9-LABEL: @reduction_icmp_v8i16(
 ; GFX9-NEXT:  entry:
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[VEC8:%.*]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[VEC8:%.*]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <8 x i16> [[VEC8]], [[RDX_SHUF]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i16> [[VEC8]], <8 x i16> [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i16> [[RDX_MINMAX_SELECT]], <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i16> [[RDX_MINMAX_SELECT]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp ult <8 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP2]], <8 x i16> [[RDX_MINMAX_SELECT]], <8 x i16> [[RDX_SHUF1]]
-; GFX9-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <8 x i16> [[RDX_MINMAX_SELECT3]], <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <8 x i16> [[RDX_MINMAX_SELECT3]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp ult <8 x i16> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i16> [[RDX_MINMAX_SELECT3]], <8 x i16> [[RDX_SHUF4]]
 ; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <8 x i16> [[RDX_MINMAX_SELECT6]], i32 0
@@ -402,16 +402,16 @@ entry:
 define i16 @reduction_smin_v16i16(<16 x i16> %vec16) {
 ; GFX9-LABEL: @reduction_smin_v16i16(
 ; GFX9-NEXT:  entry:
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i16> [[VEC16:%.*]], <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i16> [[VEC16:%.*]], <16 x i16> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <16 x i16> [[VEC16]], [[RDX_SHUF]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i16> [[VEC16]], <16 x i16> [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT]], <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT]], <16 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp slt <16 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i16> [[RDX_MINMAX_SELECT]], <16 x i16> [[RDX_SHUF1]]
-; GFX9-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT3]], <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT3]], <16 x i16> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP5:%.*]] = icmp slt <16 x i16> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i16> [[RDX_MINMAX_SELECT3]], <16 x i16> [[RDX_SHUF4]]
-; GFX9-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT6]], <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF7:%.*]] = shufflevector <16 x i16> [[RDX_MINMAX_SELECT6]], <16 x i16> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = icmp slt <16 x i16> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i16> [[RDX_MINMAX_SELECT6]], <16 x i16> [[RDX_SHUF7]]
 ; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <16 x i16> [[RDX_MINMAX_SELECT9]], i32 0
@@ -530,10 +530,10 @@ entry:
 define i16 @reduction_umax_v4i16(<4 x i16> %vec4) {
 ; GFX9-LABEL: @reduction_umax_v4i16(
 ; GFX9-NEXT:  entry:
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i16> [[VEC4]], [[RDX_SHUF]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i16> [[VEC4]], <4 x i16> [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp ugt <4 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <4 x i16> [[RDX_MINMAX_SELECT3]], i32 0
@@ -572,10 +572,10 @@ entry:
 define i16 @reduction_smax_v4i16(<4 x i16> %vec4) {
 ; GFX9-LABEL: @reduction_smax_v4i16(
 ; GFX9-NEXT:  entry:
-; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i16> [[VEC4:%.*]], <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i16> [[VEC4]], [[RDX_SHUF]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i16> [[VEC4]], <4 x i16> [[RDX_SHUF]]
-; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; GFX9-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; GFX9-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i16> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i16> [[RDX_MINMAX_SELECT]], <4 x i16> [[RDX_SHUF1]]
 ; GFX9-NEXT:    [[TMP0:%.*]] = extractelement <4 x i16> [[RDX_MINMAX_SELECT3]], i32 0

diff  --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll
index 0bf2d23a591b..3d76e76d1518 100644
--- a/llvm/test/Transforms/SROA/vector-promotion.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion.ll
@@ -57,7 +57,7 @@ entry:
 ; CHECK-NOT: load
 ; CHECK:      %[[extract1:.*]] = extractelement <4 x i32> %x, i32 2
 ; CHECK-NEXT: %[[extract2:.*]] = extractelement <4 x i32> %y, i32 3
-; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> %y, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> %y, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT: %[[extract4:.*]] = extractelement <2 x i32> %[[extract3]], i32 0
 
   %tmp4 = add i32 %tmp1, %tmp2
@@ -305,17 +305,17 @@ entry:
   %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
   %first = load <2 x i32>, <2 x i32>* %a.cast0
 ; CHECK-NOT: load
-; CHECK:      %[[extract1:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK:      %[[extract1:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
 
   %a.gep1 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 1
   %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
   %second = load <2 x i32>, <2 x i32>* %a.cast1
-; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> poison, <2 x i32> <i32 1, i32 2>
 
   %a.gep2 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 2
   %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
   %third = load <2 x i32>, <2 x i32>* %a.cast2
-; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
 
   %tmp = shufflevector <2 x i32> %first, <2 x i32> %second, <2 x i32> <i32 0, i32 2>
   %ret = shufflevector <2 x i32> %tmp, <2 x i32> %third, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -372,7 +372,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i1 false)
 ; CHECK:      %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
 ; CHECK-NEXT: %[[x:.*]] = load <2 x float>, <2 x float>* %[[xptr]]
-; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>  
 
   %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1
@@ -380,7 +380,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i1 false)
 ; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
 ; CHECK-NEXT: %[[y:.*]] = load <2 x float>, <2 x float>* %[[yptr]]
-; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
+; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
 
   %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2
@@ -388,7 +388,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i1 false)
 ; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
 ; CHECK-NEXT: %[[z:.*]] = load <2 x float>, <2 x float>* %[[zptr]]
-; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
 
   %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3
@@ -400,7 +400,7 @@ entry:
 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i1 false)
 ; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
-; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
 
   %ret = load <4 x float>, <4 x float>* %a
@@ -594,7 +594,7 @@ entry:
   store i32 %y, i32* %a.tmp2
 ; CHECK-NOT: store
 ; CHECK:      %[[V1:.*]] = bitcast i32 %y to <2 x i16>
-; CHECK-NEXT: %[[V2:.*]] = shufflevector <2 x i16> %[[V1]], <2 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT: %[[V2:.*]] = shufflevector <2 x i16> %[[V1]], <2 x i16> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
 ; CHECK-NEXT: %[[V3:.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i16> %[[V2]], <4 x i16> %x
 ; CHECK-NEXT: %[[V4:.*]] = bitcast <4 x i16> %[[V3]] to <2 x float>
 

diff  --git a/llvm/unittests/IR/PatternMatch.cpp b/llvm/unittests/IR/PatternMatch.cpp
index 9a1a73a5c06f..57840b32e4d1 100644
--- a/llvm/unittests/IR/PatternMatch.cpp
+++ b/llvm/unittests/IR/PatternMatch.cpp
@@ -950,11 +950,14 @@ TEST_F(PatternMatchTest, VectorOps) {
   Value *EX2 = IRB.CreateExtractElement(VI4, (uint64_t)0);
   Value *EX3 = IRB.CreateExtractElement(IdxVec, (uint64_t)1);
 
-  Value *Zero = ConstantAggregateZero::get(i32VecTy);
-  Value *SI1 = IRB.CreateShuffleVector(VI1, UndefVec, Zero);
+  Constant *Zero = ConstantAggregateZero::get(i32VecTy);
+  SmallVector<int, 16> ZeroMask;
+  ShuffleVectorInst::getShuffleMask(Zero, ZeroMask);
+
+  Value *SI1 = IRB.CreateShuffleVector(VI1, ZeroMask);
   Value *SI2 = IRB.CreateShuffleVector(VI3, VI4, IdxVec);
-  Value *SI3 = IRB.CreateShuffleVector(VI3, UndefVec, Zero);
-  Value *SI4 = IRB.CreateShuffleVector(VI4, UndefVec, Zero);
+  Value *SI3 = IRB.CreateShuffleVector(VI3, ZeroMask);
+  Value *SI4 = IRB.CreateShuffleVector(VI4, ZeroMask);
 
   Value *SP1 = IRB.CreateVectorSplat(2, IRB.getInt8(2));
   Value *SP2 = IRB.CreateVectorSplat(2, Val);


        


More information about the cfe-commits mailing list