[llvm] 3830e4e - AMDGPU: Create poison values instead of undef

Wed Nov 16 14:47:30 PST 2022

Author: Matt Arsenault
Date: 2022-11-16T14:47:24-08:00
New Revision: 3830e4e58cd72566aee1d412054667ad51470b25

URL: https://github.com/llvm/llvm-project/commit/3830e4e58cd72566aee1d412054667ad51470b25
DIFF: https://github.com/llvm/llvm-project/commit/3830e4e58cd72566aee1d412054667ad51470b25.diff

LOG: AMDGPU: Create poison values instead of undef

These placeholders don't care about the finer points on
the difference between the two.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
    llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
    llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
    llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
    llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
    llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
    llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
    llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
    llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll
    llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 3ccfd9dde2695..28967bb8e5b1c 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -626,7 +626,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
   if (NeedResult) {
     // Create a PHI node to get our new atomic result into the exit block.
     PHINode *const PHI = B.CreatePHI(Ty, 2);
-    PHI->addIncoming(UndefValue::get(Ty), EntryBB);
+    PHI->addIncoming(PoisonValue::get(Ty), EntryBB);
     PHI->addIncoming(NewI, SingleLaneTerminator->getParent());
 
     // We need to broadcast the value who was the lowest active lane (the first
@@ -643,7 +643,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
       CallInst *const ReadFirstLaneHi =
           B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
       Value *const PartialInsert = B.CreateInsertElement(
-          UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
+          PoisonValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
       Value *const Insert =
           B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
       BroadcastI = B.CreateBitCast(Insert, Ty);
@@ -690,7 +690,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
       B.SetInsertPoint(PixelExitBB->getFirstNonPHI());
 
       PHINode *const PHI = B.CreatePHI(Ty, 2);
-      PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB);
+      PHI->addIncoming(PoisonValue::get(Ty), PixelEntryBB);
       PHI->addIncoming(Result, I.getParent());
       I.replaceAllUsesWith(PHI);
     } else {

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 97f53258783ee..41ed2467170f0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -473,7 +473,7 @@ static Value *insertValues(IRBuilder<> &Builder,
     return Values[0];
   }
 
-  Value *NewVal = UndefValue::get(Ty);
+  Value *NewVal = PoisonValue::get(Ty);
   for (int I = 0, E = Values.size(); I != E; ++I)
     NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);
 
@@ -794,7 +794,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
 
   Value *NewFDiv = nullptr;
   if (auto *VT = dyn_cast<FixedVectorType>(FDiv.getType())) {
-    NewFDiv = UndefValue::get(VT);
+    NewFDiv = PoisonValue::get(VT);
 
     // FIXME: Doesn't do the right thing for cases where the vector is partially
     // constant. This works when the scalarizer pass is run first.
@@ -1260,7 +1260,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
     Builder.SetCurrentDebugLocation(I.getDebugLoc());
 
     if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
-      NewDiv = UndefValue::get(VT);
+      NewDiv = PoisonValue::get(VT);
 
       for (unsigned N = 0, E = VT->getNumElements(); N != E; ++N) {
         Value *NumEltN = Builder.CreateExtractElement(Num, N);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 5a4426ba8113c..1ba7beddbd100 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -911,12 +911,9 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
 
   Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
   GlobalVariable *GV = new GlobalVariable(
-      *Mod, GVTy, false, GlobalValue::InternalLinkage,
-      UndefValue::get(GVTy),
-      Twine(F->getName()) + Twine('.') + I.getName(),
-      nullptr,
-      GlobalVariable::NotThreadLocal,
-      AMDGPUAS::LOCAL_ADDRESS);
+      *Mod, GVTy, false, GlobalValue::InternalLinkage, PoisonValue::get(GVTy),
+      Twine(F->getName()) + Twine('.') + I.getName(), nullptr,
+      GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
   GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
   GV->setAlignment(I.getAlign());
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 4f8a61a770973..15736005409e9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -340,7 +340,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
     B.SetCurrentDebugLocation(RI->getDebugLoc());
 
     int RetIdx = 0;
-    Value *NewRetVal = UndefValue::get(NewRetTy);
+    Value *NewRetVal = PoisonValue::get(NewRetTy);
 
     Value *RetVal = RI->getReturnValue();
     if (RetVal)
@@ -362,7 +362,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
     if (OutArgIndexes.count(Arg.getArgNo())) {
       // It's easier to preserve the type of the argument list. We rely on
       // DeadArgumentElimination to take care of these.
-      StubCallArgs.push_back(UndefValue::get(Arg.getType()));
+      StubCallArgs.push_back(PoisonValue::get(Arg.getType()));
     } else {
       StubCallArgs.push_back(&Arg);
     }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 9adcb9b55013b..753e1373d71c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -224,7 +224,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
         DummyReturnBB = BasicBlock::Create(F.getContext(),
                                            "DummyReturnBlock", &F);
         Type *RetTy = F.getReturnType();
-        Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
+        Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
         ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
         ReturningBlocks.push_back(DummyReturnBB);
       }
@@ -286,7 +286,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
       // structurizer/annotator can't handle the multiple exits
 
       Type *RetTy = F.getReturnType();
-      Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
+      Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
       // Remove and delete the unreachable inst.
       UnreachableBlock->getTerminator()->eraseFromParent();
 

diff  --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index afd2a38b11ec5..a1327f9e087b9 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -131,7 +131,7 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
 
   BoolTrue = ConstantInt::getTrue(Context);
   BoolFalse = ConstantInt::getFalse(Context);
-  BoolUndef = UndefValue::get(Boolean);
+  BoolUndef = PoisonValue::get(Boolean);
   IntMaskZero = ConstantInt::get(IntMask, 0);
 
   If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask });

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
index 70ba804081828..32f9cbdf138e8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
@@ -64,7 +64,7 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
 ; IR-NEXT:    [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0)
 ; IR-NEXT:    br label [[TMP11]]
 ; IR:       11:
-; IR-NEXT:    [[TMP12:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
+; IR-NEXT:    [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
 ; IR-NEXT:    [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP12]])
 ; IR-NEXT:    [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP5]]
 ; IR-NEXT:    call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP14]], i32 0, i32 0, i32 0)
@@ -158,7 +158,7 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
 ; IR-NEXT:    [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0)
 ; IR-NEXT:    br label [[TMP11]]
 ; IR:       11:
-; IR-NEXT:    [[TMP12:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
+; IR-NEXT:    [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
 ; IR-NEXT:    [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP12]])
 ; IR-NEXT:    [[TMP14:%.*]] = sub i32 [[TMP13]], [[TMP5]]
 ; IR-NEXT:    call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP14]], i32 0, i32 0, i32 0)
@@ -255,7 +255,7 @@ define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) {
 ; IR-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 [[TMP8]], <4 x i32> [[ARG:%.*]], i32 0, i32 0, i32 0, i32 0)
 ; IR-NEXT:    br label [[TMP12]]
 ; IR:       12:
-; IR-NEXT:    [[TMP13:%.*]] = phi i32 [ undef, [[DOTENTRY:%.*]] ], [ [[TMP11]], [[TMP10]] ]
+; IR-NEXT:    [[TMP13:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP11]], [[TMP10]] ]
 ; IR-NEXT:    [[TMP14:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP13]])
 ; IR-NEXT:    [[TMP15:%.*]] = and i32 [[TMP5]], 1
 ; IR-NEXT:    [[TMP16:%.*]] = xor i32 [[TMP14]], [[TMP15]]

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
index 74228ce7f3444..1a9b966adec97 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -82,7 +82,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
 ; CHECK: %[[NO_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
 ; CHECK: %[[NO_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
 ; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float %[[NO_A0]], %[[NO_B0]]
-; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
+; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[NO_FDIV0]], i64 0
 ; CHECK: %[[NO_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
 ; CHECK: %[[NO_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
 ; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float %[[NO_A1]], %[[NO_B1]]
@@ -92,7 +92,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
 ; CHECK: %[[HALF_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
 ; CHECK: %[[HALF_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
 ; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float %[[HALF_A0]], %[[HALF_B0]]
-; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0
+; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[HALF_FDIV0]], i64 0
 ; CHECK: %[[HALF_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
 ; CHECK: %[[HALF_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
 ; CHECK: %[[HALF_FDIV1:[0-9]+]] = fdiv float %[[HALF_A1]], %[[HALF_B1]]
@@ -102,7 +102,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
 ; CHECK: %[[ONE_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
 ; CHECK: %[[ONE_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
 ; CHECK: %[[ONE_FDIV0:[0-9]+]] = fdiv float %[[ONE_A0]], %[[ONE_B0]]
-; CHECK: %[[ONE_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[ONE_FDIV0]], i64 0
+; CHECK: %[[ONE_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[ONE_FDIV0]], i64 0
 ; CHECK: %[[ONE_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
 ; CHECK: %[[ONE_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
 ; CHECK: %[[ONE_FDIV1:[0-9]+]] = fdiv float %[[ONE_A1]], %[[ONE_B1]]
@@ -112,7 +112,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #
 ; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
 ; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
 ; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]])
-; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
+; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FDIV0]], i64 0
 ; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
 ; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
 ; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]])
@@ -136,7 +136,7 @@ define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2
 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
 ; CHECK: %[[NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]]
-; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
+; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[NO_FDIV0]], i64 0
 ; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 1.000000e+00, %[[NO1]]
 ; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
@@ -144,7 +144,7 @@ define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2
 
 ; CHECK: %[[HALF0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[HALF0]]
-; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0
+; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[HALF_FDIV0]], i64 0
 ; CHECK: %[[HALF1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[HALF_FDIV1:[0-9]+]] =  fdiv float 1.000000e+00, %[[HALF1]]
 ; CHECK: %md.half.ulp = insertelement <2 x float> %[[HALF_INS0]], float %[[HALF_FDIV1]], i64 1
@@ -152,7 +152,7 @@ define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2
 
 ; CHECK: %[[AFN_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]])
-; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0
+; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_NO_FDIV0]], i64 0
 ; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]])
 ; CHECK: %afn.no.md = insertelement <2 x float> %[[AFN_NO_INS0]], float %[[AFN_NO_FDIV1]], i64 1
@@ -160,7 +160,7 @@ define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2
 
 ; CHECK: %[[FAST_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]])
-; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0
+; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_NO_RCP0]], i64 0
 ; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[FAST_NO_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]])
 ; CHECK: %fast.no.md = insertelement <2 x float> %[[FAST_NO_INS0]], float %[[FAST_NO_RCP1]], i64 1
@@ -168,7 +168,7 @@ define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2
 
 ; CHECK: %[[AFN_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]])
-; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0
+; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_25_RCP0]], i64 0
 ; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[AFN_25_RCP1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]])
 ; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_25_INS0]], float %[[AFN_25_RCP1]], i64 1
@@ -176,7 +176,7 @@ define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2
 
 ; CHECK: %[[FAST_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]])
-; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0
+; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_25_RCP0]], i64 0
 ; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[FAST_25_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]])
 ; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_25_INS0]], float %[[FAST_25_RCP1]], i64 1
@@ -206,7 +206,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
 ; CHECK: %[[NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]]
-; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
+; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[NO_FDIV0]], i64 0
 ; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 2.000000e+00, %[[NO1]]
 ; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
@@ -214,7 +214,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
 
 ; CHECK: %[[AFN_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]])
-; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0
+; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_NO_FDIV0]], i64 0
 ; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]])
 ; CHECK: %[[AFN_NO_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_NO_FDIV1]]
@@ -223,7 +223,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
 
 ; CHECK: %[[FAST_NO0:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]])
-; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0
+; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_NO_RCP0]], i64 0
 ; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[FAST_NO_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]])
 ; CHECK: %[[FAST_NO_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_NO_RCP1]]
@@ -232,7 +232,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
 
 ; CHECK: %[[AFN_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]])
-; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0
+; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_25_RCP0]], i64 0
 ; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[AFN_25_RCP1:[0-9]+]] =  call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]])
 ; CHECK: %[[AFN_25_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_25_RCP1]]
@@ -241,7 +241,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out
 
 ; CHECK: %[[FAST_250:[0-9]+]] =  extractelement <2 x float> %x, i64 0
 ; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]])
-; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0
+; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_25_RCP0]], i64 0
 ; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
 ; CHECK: %[[FAST_25_RCP1:[0-9]+]] =  call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]])
 ; CHECK: %[[FAST_25_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_25_RCP1]]
@@ -271,7 +271,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace
 ; CHECK: %[[AFN_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0
 ; CHECK: %[[AFN_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B0]])
 ; CHECK: %[[AFN_MUL0:[0-9]+]] = fmul afn float %[[AFN_A0]], %[[AFN_RCP0]]
-; CHECK: %[[AFN_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_MUL0]], i64 0
+; CHECK: %[[AFN_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[AFN_MUL0]], i64 0
 ; CHECK: %[[AFN_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1
 ; CHECK: %[[AFN_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1
 ; CHECK: %[[AFN_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B1]])
@@ -283,7 +283,7 @@ define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace
 ; CHECK: %[[FAST_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0
 ; CHECK: %[[FAST_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B0]])
 ; CHECK: %[[FAST_MUL0:[0-9]+]] = fmul fast float %[[FAST_A0]], %[[FAST_RCP0]]
-; CHECK: %[[FAST_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_MUL0]], i64 0
+; CHECK: %[[FAST_INS0:[0-9]+]] = insertelement <2 x float> poison, float %[[FAST_MUL0]], i64 0
 ; CHECK: %[[FAST_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1
 ; CHECK: %[[FAST_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1
 ; CHECK: %[[FAST_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B1]])

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index 49f016407c108..a76b69fab36f9 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -1086,7 +1086,7 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x
 ; CHECK-NEXT:    [[TMP29:%.*]] = icmp uge i32 [[TMP28]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = add i32 [[TMP26]], 1
 ; CHECK-NEXT:    [[TMP31:%.*]] = select i1 [[TMP29]], i32 [[TMP30]], i32 [[TMP26]]
-; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> undef, i32 [[TMP31]], i64 0
+; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> poison, i32 [[TMP31]], i64 0
 ; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <4 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i32> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP35:%.*]] = uitofp i32 [[TMP34]] to float
@@ -1412,7 +1412,7 @@ define amdgpu_kernel void @urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x
 ; CHECK-NEXT:    [[TMP27:%.*]] = icmp uge i32 [[TMP26]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP28:%.*]] = sub i32 [[TMP26]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP27]], i32 [[TMP28]], i32 [[TMP26]]
-; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <4 x i32> undef, i32 [[TMP29]], i64 0
+; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP29]], i64 0
 ; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <4 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <4 x i32> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP33:%.*]] = uitofp i32 [[TMP32]] to float
@@ -1723,7 +1723,7 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x
 ; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP33]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = xor i32 [[TMP38]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = sub i32 [[TMP39]], [[TMP5]]
-; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <4 x i32> undef, i32 [[TMP40]], i64 0
+; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <4 x i32> poison, i32 [[TMP40]], i64 0
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <4 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <4 x i32> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP44:%.*]] = ashr i32 [[TMP42]], 31
@@ -2156,7 +2156,7 @@ define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x
 ; CHECK-NEXT:    [[TMP35:%.*]] = select i1 [[TMP33]], i32 [[TMP34]], i32 [[TMP32]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = xor i32 [[TMP35]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = sub i32 [[TMP36]], [[TMP3]]
-; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i32> undef, i32 [[TMP37]], i64 0
+; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <4 x i32> poison, i32 [[TMP37]], i64 0
 ; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <4 x i32> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP41:%.*]] = ashr i32 [[TMP39]], 31
@@ -2534,7 +2534,7 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and i32 [[TMP17]], 65535
 ; CHECK-NEXT:    [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16
-; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i16> undef, i16 [[TMP19]], i64 0
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i16> poison, i16 [[TMP19]], i64 0
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i16> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x i16> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP23:%.*]] = zext i16 [[TMP21]] to i32
@@ -2742,7 +2742,7 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; CHECK-NEXT:    [[TMP19:%.*]] = sub i32 [[TMP3]], [[TMP18]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = and i32 [[TMP19]], 65535
 ; CHECK-NEXT:    [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
-; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i16> undef, i16 [[TMP21]], i64 0
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i16> poison, i16 [[TMP21]], i64 0
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i16> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i16> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP25:%.*]] = zext i16 [[TMP23]] to i32
@@ -2974,7 +2974,7 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; CHECK-NEXT:    [[TMP21:%.*]] = shl i32 [[TMP20]], 16
 ; CHECK-NEXT:    [[TMP22:%.*]] = ashr i32 [[TMP21]], 16
 ; CHECK-NEXT:    [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i16> undef, i16 [[TMP23]], i64 0
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i16> poison, i16 [[TMP23]], i64 0
 ; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x i16> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x i16> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP27:%.*]] = sext i16 [[TMP25]] to i32
@@ -3238,7 +3238,7 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; CHECK-NEXT:    [[TMP23:%.*]] = shl i32 [[TMP22]], 16
 ; CHECK-NEXT:    [[TMP24:%.*]] = ashr i32 [[TMP23]], 16
 ; CHECK-NEXT:    [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
-; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x i16> undef, i16 [[TMP25]], i64 0
+; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP25]], i64 0
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x i16> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <4 x i16> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP29:%.*]] = sext i16 [[TMP27]] to i32
@@ -3844,7 +3844,7 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and i32 [[TMP17]], 65535
 ; CHECK-NEXT:    [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16
-; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <3 x i16> undef, i16 [[TMP19]], i64 0
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <3 x i16> poison, i16 [[TMP19]], i64 0
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <3 x i16> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <3 x i16> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP23:%.*]] = zext i16 [[TMP21]] to i32
@@ -4007,7 +4007,7 @@ define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ; CHECK-NEXT:    [[TMP19:%.*]] = sub i32 [[TMP3]], [[TMP18]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = and i32 [[TMP19]], 65535
 ; CHECK-NEXT:    [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
-; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <3 x i16> undef, i16 [[TMP21]], i64 0
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <3 x i16> poison, i16 [[TMP21]], i64 0
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <3 x i16> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <3 x i16> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP25:%.*]] = zext i16 [[TMP23]] to i32
@@ -4189,7 +4189,7 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ; CHECK-NEXT:    [[TMP21:%.*]] = shl i32 [[TMP20]], 16
 ; CHECK-NEXT:    [[TMP22:%.*]] = ashr i32 [[TMP21]], 16
 ; CHECK-NEXT:    [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <3 x i16> undef, i16 [[TMP23]], i64 0
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <3 x i16> poison, i16 [[TMP23]], i64 0
 ; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <3 x i16> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <3 x i16> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP27:%.*]] = sext i16 [[TMP25]] to i32
@@ -4394,7 +4394,7 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ; CHECK-NEXT:    [[TMP23:%.*]] = shl i32 [[TMP22]], 16
 ; CHECK-NEXT:    [[TMP24:%.*]] = ashr i32 [[TMP23]], 16
 ; CHECK-NEXT:    [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
-; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <3 x i16> undef, i16 [[TMP25]], i64 0
+; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <3 x i16> poison, i16 [[TMP25]], i64 0
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <3 x i16> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <3 x i16> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP29:%.*]] = sext i16 [[TMP27]] to i32
@@ -4612,7 +4612,7 @@ define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x
 ; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and i32 [[TMP17]], 32767
 ; CHECK-NEXT:    [[TMP19:%.*]] = trunc i32 [[TMP18]] to i15
-; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <3 x i15> undef, i15 [[TMP19]], i64 0
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <3 x i15> poison, i15 [[TMP19]], i64 0
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <3 x i15> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <3 x i15> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP23:%.*]] = zext i15 [[TMP21]] to i32
@@ -4795,7 +4795,7 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x
 ; CHECK-NEXT:    [[TMP19:%.*]] = sub i32 [[TMP3]], [[TMP18]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = and i32 [[TMP19]], 32767
 ; CHECK-NEXT:    [[TMP21:%.*]] = trunc i32 [[TMP20]] to i15
-; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <3 x i15> undef, i15 [[TMP21]], i64 0
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <3 x i15> poison, i15 [[TMP21]], i64 0
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <3 x i15> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <3 x i15> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP25:%.*]] = zext i15 [[TMP23]] to i32
@@ -5000,7 +5000,7 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x
 ; CHECK-NEXT:    [[TMP21:%.*]] = shl i32 [[TMP20]], 17
 ; CHECK-NEXT:    [[TMP22:%.*]] = ashr i32 [[TMP21]], 17
 ; CHECK-NEXT:    [[TMP23:%.*]] = trunc i32 [[TMP22]] to i15
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <3 x i15> undef, i15 [[TMP23]], i64 0
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <3 x i15> poison, i15 [[TMP23]], i64 0
 ; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <3 x i15> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <3 x i15> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP27:%.*]] = sext i15 [[TMP25]] to i32
@@ -5223,7 +5223,7 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x
 ; CHECK-NEXT:    [[TMP23:%.*]] = shl i32 [[TMP22]], 17
 ; CHECK-NEXT:    [[TMP24:%.*]] = ashr i32 [[TMP23]], 17
 ; CHECK-NEXT:    [[TMP25:%.*]] = trunc i32 [[TMP24]] to i15
-; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <3 x i15> undef, i15 [[TMP25]], i64 0
+; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <3 x i15> poison, i15 [[TMP25]], i64 0
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <3 x i15> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <3 x i15> [[Y]], i64 1
 ; CHECK-NEXT:    [[TMP29:%.*]] = sext i15 [[TMP27]] to i32
@@ -5558,7 +5558,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out,
 ; CHECK-LABEL: @udiv_v2i32_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = udiv i32 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = udiv i32 [[TMP4]], 4096
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP5]], i64 1
@@ -5600,7 +5600,7 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)*
 ; CHECK-LABEL: @udiv_v2i32_mixed_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = udiv i32 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = udiv i32 [[TMP4]], 4095
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP5]], i64 1
@@ -5680,7 +5680,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ; CHECK-NEXT:    [[TMP29:%.*]] = icmp uge i32 [[TMP28]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = add i32 [[TMP26]], 1
 ; CHECK-NEXT:    [[TMP31:%.*]] = select i1 [[TMP29]], i32 [[TMP30]], i32 [[TMP26]]
-; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <2 x i32> undef, i32 [[TMP31]], i64 0
+; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <2 x i32> poison, i32 [[TMP31]], i64 0
 ; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <2 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1
 ; CHECK-NEXT:    [[TMP35:%.*]] = uitofp i32 [[TMP34]] to float
@@ -5952,7 +5952,7 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out,
 ; CHECK-LABEL: @urem_v2i32_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = urem i32 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = urem i32 [[TMP4]], 4096
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP5]], i64 1
@@ -6022,7 +6022,7 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ; CHECK-NEXT:    [[TMP27:%.*]] = icmp uge i32 [[TMP26]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP28:%.*]] = sub i32 [[TMP26]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP27]], i32 [[TMP28]], i32 [[TMP26]]
-; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <2 x i32> undef, i32 [[TMP29]], i64 0
+; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <2 x i32> poison, i32 [[TMP29]], i64 0
 ; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <2 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1
 ; CHECK-NEXT:    [[TMP33:%.*]] = uitofp i32 [[TMP32]] to float
@@ -6338,7 +6338,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out,
 ; CHECK-LABEL: @sdiv_v2i32_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i32 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = sdiv i32 [[TMP4]], 4096
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP5]], i64 1
@@ -6392,7 +6392,7 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)*
 ; CHECK-LABEL: @ssdiv_v2i32_mixed_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i32 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = sdiv i32 [[TMP4]], 4095
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP5]], i64 1
@@ -6487,7 +6487,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP33]]
 ; CHECK-NEXT:    [[TMP39:%.*]] = xor i32 [[TMP38]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP40:%.*]] = sub i32 [[TMP39]], [[TMP5]]
-; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <2 x i32> undef, i32 [[TMP40]], i64 0
+; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <2 x i32> poison, i32 [[TMP40]], i64 0
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1
 ; CHECK-NEXT:    [[TMP44:%.*]] = ashr i32 [[TMP42]], 31
@@ -6859,7 +6859,7 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out,
 ; CHECK-LABEL: @srem_v2i32_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = srem i32 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = srem i32 [[TMP4]], 4096
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP5]], i64 1
@@ -6953,7 +6953,7 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ; CHECK-NEXT:    [[TMP35:%.*]] = select i1 [[TMP33]], i32 [[TMP34]], i32 [[TMP32]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = xor i32 [[TMP35]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = sub i32 [[TMP36]], [[TMP3]]
-; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <2 x i32> undef, i32 [[TMP37]], i64 0
+; CHECK-NEXT:    [[TMP38:%.*]] = insertelement <2 x i32> poison, i32 [[TMP37]], i64 0
 ; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <2 x i32> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1
 ; CHECK-NEXT:    [[TMP41:%.*]] = ashr i32 [[TMP39]], 31
@@ -7474,7 +7474,7 @@ define amdgpu_kernel void @udiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out,
 ; CHECK-LABEL: @udiv_v2i64_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = udiv i64 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = udiv i64 [[TMP4]], 4096
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP5]], i64 1
@@ -7520,7 +7520,7 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
 ; CHECK-LABEL: @udiv_v2i64_mixed_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = udiv i64 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = udiv i64 [[TMP4]], 4095
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP5]], i64 1
@@ -7768,7 +7768,7 @@ define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = udiv i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> undef, i64 [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = udiv i64 [[TMP5]], [[TMP6]]
@@ -8161,7 +8161,7 @@ define amdgpu_kernel void @urem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out,
 ; CHECK-LABEL: @urem_v2i64_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = urem i64 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = urem i64 [[TMP4]], 4096
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP5]], i64 1
@@ -8208,7 +8208,7 @@ define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = urem i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> undef, i64 [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = urem i64 [[TMP5]], [[TMP6]]
@@ -8869,7 +8869,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out,
 ; CHECK-LABEL: @sdiv_v2i64_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i64 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = sdiv i64 [[TMP4]], 4096
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP5]], i64 1
@@ -8931,7 +8931,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)*
 ; CHECK-LABEL: @ssdiv_v2i64_mixed_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i64 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = sdiv i64 [[TMP4]], 4095
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP5]], i64 1
@@ -9208,7 +9208,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = sdiv i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> undef, i64 [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = sdiv i64 [[TMP5]], [[TMP6]]
@@ -10368,7 +10368,7 @@ define amdgpu_kernel void @srem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out,
 ; CHECK-LABEL: @srem_v2i64_pow2k_denom(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = srem i64 [[TMP1]], 4096
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = srem i64 [[TMP4]], 4096
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP5]], i64 1
@@ -10440,7 +10440,7 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = srem i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> undef, i64 [[TMP3]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[X]], i64 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[SHL_Y]], i64 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = srem i64 [[TMP5]], [[TMP6]]

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
index 6c72a5726abae..5f57d73f0b083 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
@@ -63,7 +63,7 @@ define <2 x i8> @mul_v1i16(<1 x i16> %arg) {
 ; SI-NEXT:    [[TMP1:%.*]] = zext i16 [[TMP0]] to i32
 ; SI-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 42)
 ; SI-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
-; SI-NEXT:    [[MUL:%.*]] = insertelement <1 x i16> undef, i16 [[TMP3]], i64 0
+; SI-NEXT:    [[MUL:%.*]] = insertelement <1 x i16> poison, i16 [[TMP3]], i64 0
 ; SI-NEXT:    [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8>
 ; SI-NEXT:    ret <2 x i8> [[CAST]]
 ;
@@ -92,7 +92,7 @@ define <1 x i8> @mul_v1i8(<1 x i8> %arg) {
 ; SI-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
 ; SI-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 42)
 ; SI-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
-; SI-NEXT:    [[MUL:%.*]] = insertelement <1 x i8> undef, i8 [[TMP3]], i64 0
+; SI-NEXT:    [[MUL:%.*]] = insertelement <1 x i8> poison, i8 [[TMP3]], i64 0
 ; SI-NEXT:    ret <1 x i8> [[MUL]]
 ;
 ; VI-LABEL: @mul_v1i8(
@@ -122,7 +122,7 @@ define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; SI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
 ; SI-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
 ; SI-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
-; SI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
+; SI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
 ; SI-NEXT:    [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
 ; SI-NEXT:    ret <2 x i32> [[MUL]]
 ;
@@ -137,7 +137,7 @@ define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; VI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
 ; VI-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
 ; VI-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
-; VI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
+; VI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
 ; VI-NEXT:    [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
 ; VI-NEXT:    ret <2 x i32> [[MUL]]
 ;
@@ -192,7 +192,7 @@ define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; SI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
 ; SI-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
 ; SI-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
-; SI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
+; SI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
 ; SI-NEXT:    [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
 ; SI-NEXT:    ret <2 x i32> [[MUL]]
 ;
@@ -205,7 +205,7 @@ define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; VI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
 ; VI-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
 ; VI-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
-; VI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
+; VI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
 ; VI-NEXT:    [[MUL:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
 ; VI-NEXT:    ret <2 x i32> [[MUL]]
 ;
@@ -543,7 +543,7 @@ define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) {
 ; SI-NEXT:    [[TMP10:%.*]] = zext i31 [[TMP4]] to i32
 ; SI-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]])
 ; SI-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
-; SI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
+; SI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> poison, i31 [[TMP8]], i64 0
 ; SI-NEXT:    [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
 ; SI-NEXT:    ret <2 x i31> [[MUL]]
 ;
@@ -562,7 +562,7 @@ define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) {
 ; VI-NEXT:    [[TMP10:%.*]] = zext i31 [[TMP4]] to i32
 ; VI-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]])
 ; VI-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
-; VI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
+; VI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> poison, i31 [[TMP8]], i64 0
 ; VI-NEXT:    [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
 ; VI-NEXT:    ret <2 x i31> [[MUL]]
 ;
@@ -596,7 +596,7 @@ define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) {
 ; SI-NEXT:    [[TMP10:%.*]] = sext i31 [[TMP4]] to i32
 ; SI-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
 ; SI-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
-; SI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
+; SI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> poison, i31 [[TMP8]], i64 0
 ; SI-NEXT:    [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
 ; SI-NEXT:    ret <2 x i31> [[MUL]]
 ;
@@ -617,7 +617,7 @@ define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) {
 ; VI-NEXT:    [[TMP10:%.*]] = sext i31 [[TMP4]] to i32
 ; VI-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
 ; VI-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
-; VI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
+; VI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> poison, i31 [[TMP8]], i64 0
 ; VI-NEXT:    [[MUL:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
 ; VI-NEXT:    ret <2 x i31> [[MUL]]
 ;
@@ -813,7 +813,7 @@ define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) {
 ; SI-NEXT:    [[TMP20:%.*]] = shl i64 [[TMP19]], 32
 ; SI-NEXT:    [[TMP21:%.*]] = or i64 [[TMP18]], [[TMP20]]
 ; SI-NEXT:    [[TMP22:%.*]] = trunc i64 [[TMP21]] to i33
-; SI-NEXT:    [[TMP23:%.*]] = insertelement <2 x i33> undef, i33 [[TMP13]], i64 0
+; SI-NEXT:    [[TMP23:%.*]] = insertelement <2 x i33> poison, i33 [[TMP13]], i64 0
 ; SI-NEXT:    [[MUL:%.*]] = insertelement <2 x i33> [[TMP23]], i33 [[TMP22]], i64 1
 ; SI-NEXT:    ret <2 x i33> [[MUL]]
 ;
@@ -844,7 +844,7 @@ define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) {
 ; VI-NEXT:    [[TMP20:%.*]] = shl i64 [[TMP19]], 32
 ; VI-NEXT:    [[TMP21:%.*]] = or i64 [[TMP18]], [[TMP20]]
 ; VI-NEXT:    [[TMP22:%.*]] = trunc i64 [[TMP21]] to i33
-; VI-NEXT:    [[TMP23:%.*]] = insertelement <2 x i33> undef, i33 [[TMP13]], i64 0
+; VI-NEXT:    [[TMP23:%.*]] = insertelement <2 x i33> poison, i33 [[TMP13]], i64 0
 ; VI-NEXT:    [[MUL:%.*]] = insertelement <2 x i33> [[TMP23]], i33 [[TMP22]], i64 1
 ; VI-NEXT:    ret <2 x i33> [[MUL]]
 ;

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
index 1dad78ab9b78a..f252c7e6efd89 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
@@ -12,8 +12,8 @@
 ; RUN: llc -march=r600 -mcpu=cypress -disable-promote-alloca-to-vector < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s -check-prefix=R600-VECT -check-prefix=FUNC
 
-; HSAOPT: @mova_same_clause.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] undef, align 4
-; HSAOPT: @high_alignment.stack = internal unnamed_addr addrspace(3) global [256 x [8 x i32]] undef, align 16
+; HSAOPT: @mova_same_clause.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] poison, align 4
+; HSAOPT: @high_alignment.stack = internal unnamed_addr addrspace(3) global [256 x [8 x i32]] poison, align 16
 
 
 ; FUNC-LABEL: {{^}}mova_same_clause:

diff  --git a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
index 02049a3a1d9c8..73f4605cb0323 100644
--- a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
@@ -3,8 +3,8 @@
 ; RUN: opt -S -mcpu=gfx1010 -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=GFX10PLUS,ALL %s
 ; RUN: opt -S -mcpu=gfx1100 -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=GFX10PLUS,ALL %s
 
-; SI-NOT: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4
-; CI: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4
+; SI-NOT: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] poison, align 4
+; CI: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] poison, align 4
 
 define amdgpu_kernel void @promote_alloca_size_63(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
 entry:
@@ -26,7 +26,7 @@ entry:
   ret void
 }
 
-; ALL: @promote_alloca_size_256.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] undef, align 4
+; ALL: @promote_alloca_size_256.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] poison, align 4
 
 define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #1 {
 entry:
@@ -49,8 +49,8 @@ entry:
 }
 
 ; SI-NOT: @promote_alloca_size_1600.stack
-; CI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4
-; GFX10PLUS: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] undef, align 4
+; CI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] poison, align 4
+; GFX10PLUS: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1024 x [5 x i32]] poison, align 4
 
 define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 {
 entry:

diff  --git a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll
index d150e68d5b7b7..f5cb61ab5efa0 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll
@@ -7,7 +7,7 @@
 ; CHECK-NEXT: ret %void_one_out_non_private_arg_i32_1_use zeroinitializer
 
 ; CHECK-LABEL: define void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %0) #1 {
-; CHECK-NEXT: %2 = call %void_one_out_non_private_arg_i32_1_use @void_one_out_non_private_arg_i32_1_use.body(i32 addrspace(1)* undef)
+; CHECK-NEXT: %2 = call %void_one_out_non_private_arg_i32_1_use @void_one_out_non_private_arg_i32_1_use.body(i32 addrspace(1)* poison)
 ; CHECK-NEXT: %3 = extractvalue %void_one_out_non_private_arg_i32_1_use %2, 0
 ; CHECK-NEXT: store i32 %3, i32 addrspace(1)* %0, align 4
 ; CHECK-NEXT: ret void
@@ -17,15 +17,15 @@ define void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %val) #0 {
 }
 
 ; CHECK-LABEL: define private %bitcast_pointer_as1 @bitcast_pointer_as1.body(<3 x i32> addrspace(1)* %out) #0 {
-; CHECK-NEXT: %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef
+; CHECK-NEXT: %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison
 ; CHECK-NEXT: %bitcast = bitcast <3 x i32> addrspace(1)* %out to <4 x i32> addrspace(1)*
-; CHECK-NEXT: %1 = insertvalue %bitcast_pointer_as1 undef, <4 x i32> %load, 0
+; CHECK-NEXT: %1 = insertvalue %bitcast_pointer_as1 poison, <4 x i32> %load, 0
 ; CHECK-NEXT: ret %bitcast_pointer_as1 %1
 
 ; CHECK-LABEL: define void @bitcast_pointer_as1(<3 x i32> addrspace(1)* %0) #1 {
-; CHECK-NEXT: %2 = call %bitcast_pointer_as1 @bitcast_pointer_as1.body(<3 x i32> addrspace(1)* undef)
+; CHECK-NEXT: %2 = call %bitcast_pointer_as1 @bitcast_pointer_as1.body(<3 x i32> addrspace(1)* poison)
 define void @bitcast_pointer_as1(<3 x i32> addrspace(1)* %out) #0 {
-  %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef
+  %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison
   %bitcast = bitcast <3 x i32> addrspace(1)* %out to <4 x i32> addrspace(1)*
   store <4 x i32> %load, <4 x i32> addrspace(1)* %bitcast
   ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
index af4fcce2791e8..44a617fa02cb2 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll
@@ -31,7 +31,7 @@ define void @skip_atomic(i32* byval(i32) %val) #0 {
 }
 
 define void @skip_store_pointer_val(i32* %val) #0 {
-  store i32* %val, i32** undef
+  store i32* %val, i32** poison
   ret void
 }
 
@@ -112,7 +112,7 @@ define void @void_one_out_arg_i32_reload(i32* %val) #0 {
 }
 
 define void @void_one_out_arg_i32_store_in_
diff erent_block(i32* %out) #0 {
-  %load = load i32, i32 addrspace(1)* undef
+  %load = load i32, i32 addrspace(1)* poison
   store i32 0, i32* %out
   br label %ret
 
@@ -184,12 +184,12 @@ define void @multiple_same_return_mayalias_order(i32* %out0, i32* %out1) #0 {
 ; it isn't in the same block as the return.
 define i32 @store_in_entry_block(i1 %arg0, i32* %out) #0 {
 entry:
-  %val0 = load i32, i32 addrspace(1)* undef
+  %val0 = load i32, i32 addrspace(1)* poison
   store i32 %val0, i32* %out
   br i1 %arg0, label %if, label %endif
 
 if:
-  %val1 = load i32, i32 addrspace(1)* undef
+  %val1 = load i32, i32 addrspace(1)* poison
   br label %endif
 
 endif:
@@ -230,13 +230,13 @@ define void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %val) #0 {
 }
 
 define void @func_ptr_type(void()** %out) #0 {
-  %func = load void()*, void()** undef
+  %func = load void()*, void()** poison
   store void()* %func, void()** %out
   ret void
 }
 
 define void @bitcast_func_ptr_type(void()** %out) #0 {
-  %func = load i32()*, i32()** undef
+  %func = load i32()*, i32()** poison
   %cast = bitcast void()** %out to i32()**
   store i32()* %func, i32()** %cast
   ret void
@@ -254,21 +254,21 @@ define void @out_arg_large_array([17 x i32]* %val) #0 {
 }
 
 define <16 x i32> @num_regs_return_limit(i32* %out, i32 %val) #0 {
-  %load = load volatile <16 x i32>, <16 x i32> addrspace(1)* undef
+  %load = load volatile <16 x i32>, <16 x i32> addrspace(1)* poison
   store i32 %val, i32* %out
   ret <16 x i32> %load
 }
 
 define [15 x i32] @num_regs_reach_limit(i32* %out, i32 %val) #0 {
-  %load = load volatile [15 x i32], [15 x i32] addrspace(1)* undef
+  %load = load volatile [15 x i32], [15 x i32] addrspace(1)* poison
   store i32 %val, i32* %out
   ret [15 x i32] %load
 }
 
 
 define [15 x i32] @num_regs_reach_limit_leftover(i32* %out0, i32* %out1, i32 %val0) #0 {
-  %load0 = load volatile [15 x i32], [15 x i32] addrspace(1)* undef
-  %load1 = load volatile i32, i32 addrspace(1)* undef
+  %load0 = load volatile [15 x i32], [15 x i32] addrspace(1)* poison
+  %load1 = load volatile i32, i32 addrspace(1)* poison
   store i32 %val0, i32* %out0
   store i32 %load1, i32* %out1
   ret [15 x i32] %load0
@@ -290,14 +290,14 @@ define void @preserve_metadata(i32 %arg0, i32* %val) #0 !kernel_arg_access_qual
 ; Clang emits this pattern for 3-vectors for some reason.
 
 define void @bitcast_pointer_v4i32_v3i32(<3 x i32>* %out) #0 {
-  %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef
+  %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison
   %bitcast = bitcast <3 x i32>* %out to <4 x i32>*
   store <4 x i32> %load, <4 x i32>* %bitcast
   ret void
 }
 
 define void @bitcast_pointer_v4i32_v3f32(<3 x float>* %out) #0 {
-  %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef
+  %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison
   %bitcast = bitcast <3 x float>* %out to <4 x i32>*
   store <4 x i32> %load, <4 x i32>* %bitcast
   ret void
@@ -309,21 +309,21 @@ define void @bitcast_pointer_v4i32_v3f32(<3 x float>* %out) #0 {
 
 
 define void @bitcast_pointer_i32_f32(float* %out) #0 {
-  %load = load volatile i32, i32 addrspace(1)* undef
+  %load = load volatile i32, i32 addrspace(1)* poison
   %bitcast = bitcast float* %out to i32*
   store i32 %load, i32* %bitcast
   ret void
 }
 
 define void @bitcast_pointer_i32_f16(half* %out) #0 {
-  %load = load volatile i32, i32 addrspace(1)* undef
+  %load = load volatile i32, i32 addrspace(1)* poison
   %bitcast = bitcast half* %out to i32*
   store i32 %load, i32* %bitcast
   ret void
 }
 
 define void @bitcast_pointer_f16_i32(i32* %out) #0 {
-  %load = load volatile half, half addrspace(1)* undef
+  %load = load volatile half, half addrspace(1)* poison
   %bitcast = bitcast i32* %out to half*
   store half %load, half* %bitcast
   ret void
@@ -337,7 +337,7 @@ define void @bitcast_pointer_f16_i32(i32* %out) #0 {
 
 
 define void @bitcast_struct_v3f32_v3f32(%struct.v3f32* %out, <3 x float> %value) #0 {
-  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %cast = bitcast %struct.v3f32* %out to <4 x float>*
   store <4 x float> %extractVec, <4 x float>* %cast, align 16
   ret void
@@ -345,7 +345,7 @@ define void @bitcast_struct_v3f32_v3f32(%struct.v3f32* %out, <3 x float> %value)
 
 
 define void @bitcast_struct_v3f32_v3i32(%struct.v3f32* %out, <3 x i32> %value) #0 {
-  %extractVec = shufflevector <3 x i32> %value, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec = shufflevector <3 x i32> %value, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %cast = bitcast %struct.v3f32* %out to <4 x i32>*
   store <4 x i32> %extractVec, <4 x i32>* %cast, align 16
   ret void
@@ -365,7 +365,7 @@ define void @bitcast_struct_v3f32_v4i32(%struct.v3f32* %out, <4 x i32> %value) #
 }
 
 define void @bitcast_struct_v4f32_v3f32(%struct.v4f32* %out, <3 x float> %value) #0 {
-  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %cast = bitcast %struct.v4f32* %out to <4 x float>*
   store <4 x float> %extractVec, <4 x float>* %cast, align 16
   ret void
@@ -378,7 +378,7 @@ define void @bitcast_struct_v3f32_v2f32(%struct.v3f32* %out, <2 x float> %value)
 }
 
 define void @bitcast_struct_v3f32_f32_v3f32(%struct.v3f32.f32* %out, <3 x float> %value) #0 {
-  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %cast = bitcast %struct.v3f32.f32* %out to <4 x float>*
   store <4 x float> %extractVec, <4 x float>* %cast, align 16
   ret void
@@ -408,14 +408,14 @@ entry:
   br i1 %cond, label %ret0, label %ret1
 
 ret0:
-  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %cast0 = bitcast %struct.v3f32* %out to <4 x float>*
   store <4 x float> %extractVec, <4 x float>* %cast0, align 16
   ret void
 
 ret1:
   %cast1 = bitcast %struct.v3f32* %out to <4 x float>*
-  %load = load <4 x float>, <4 x float> addrspace(1)* undef
+  %load = load <4 x float>, <4 x float> addrspace(1)* poison
   store <4 x float> %load, <4 x float>* %cast1, align 16
   ret void
 }
@@ -484,7 +484,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@skip_store_pointer_val
 ; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    store i32* [[VAL]], i32** undef, align 8
+; CHECK-NEXT:    store i32* [[VAL]], i32** poison, align 8
 ; CHECK-NEXT:    ret void
 ;
 ;
@@ -509,7 +509,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use
 ; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE:%.*]] @void_one_out_arg_i32_1_use.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE:%.*]] @void_one_out_arg_i32_1_use.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    ret void
@@ -522,7 +522,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use_align
 ; CHECK-SAME: (i32* align 8 [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN:%.*]] @void_one_out_arg_i32_1_use_align.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN:%.*]] @void_one_out_arg_i32_1_use_align.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN]] [[TMP2]], 0
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 8
 ; CHECK-NEXT:    ret void
@@ -539,7 +539,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_use
 ; CHECK-SAME: (i1 [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_USE:%.*]] @void_one_out_arg_i32_2_use.body(i1 [[TMP0]], i32* undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_USE:%.*]] @void_one_out_arg_i32_2_use.body(i1 [[TMP0]], i32* poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_USE]] [[TMP3]], 0
 ; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP1]], align 4
 ; CHECK-NEXT:    ret void
@@ -553,7 +553,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores
 ; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES:%.*]] @void_one_out_arg_i32_2_stores.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES:%.*]] @void_one_out_arg_i32_2_stores.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_STORES]] [[TMP2]], 0
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    ret void
@@ -568,7 +568,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores_clobber
 ; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER:%.*]] @void_one_out_arg_i32_2_stores_clobber.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER:%.*]] @void_one_out_arg_i32_2_stores_clobber.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER]] [[TMP2]], 0
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    ret void
@@ -589,7 +589,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_pre_call_may_clobber
 ; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER:%.*]] @void_one_out_arg_i32_pre_call_may_clobber.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER:%.*]] @void_one_out_arg_i32_pre_call_may_clobber.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER]] [[TMP2]], 0
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    ret void
@@ -604,7 +604,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_store_in_
diff erent_block
 ; CHECK-SAME: (i32* [[OUT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32 addrspace(1)* undef, align 4
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32 addrspace(1)* poison, align 4
 ; CHECK-NEXT:    store i32 0, i32* [[OUT]], align 4
 ; CHECK-NEXT:    br label [[RET:%.*]]
 ; CHECK:       ret:
@@ -628,7 +628,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_v2i32_1_use
 ; CHECK-SAME: (<2 x i32>* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_V2I32_1_USE:%.*]] @void_one_out_arg_v2i32_1_use.body(<2 x i32>* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_V2I32_1_USE:%.*]] @void_one_out_arg_v2i32_1_use.body(<2 x i32>* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_V2I32_1_USE]] [[TMP2]], 0
 ; CHECK-NEXT:    store <2 x i32> [[TMP3]], <2 x i32>* [[TMP0]], align 8
 ; CHECK-NEXT:    ret void
@@ -641,7 +641,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_struct_1_use
 ; CHECK-SAME: (%struct* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_STRUCT_1_USE:%.*]] @void_one_out_arg_struct_1_use.body(%struct* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_STRUCT_1_USE:%.*]] @void_one_out_arg_struct_1_use.body(%struct* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_STRUCT_1_USE]] [[TMP2]], 0
 ; CHECK-NEXT:    store [[STRUCT:%.*]] [[TMP3]], %struct* [[TMP0]], align 4
 ; CHECK-NEXT:    ret void
@@ -654,7 +654,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@i32_one_out_arg_i32_1_use
 ; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[I32_ONE_OUT_ARG_I32_1_USE:%.*]] @i32_one_out_arg_i32_1_use.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[I32_ONE_OUT_ARG_I32_1_USE:%.*]] @i32_one_out_arg_i32_1_use.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[I32_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[I32_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
@@ -668,7 +668,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@unused_
diff erent_type
 ; CHECK-SAME: (i32* [[TMP0:%.*]], float* nocapture [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[UNUSED_DIFFERENT_TYPE:%.*]] @unused_
diff erent_type.body(i32* [[TMP0]], float* undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[UNUSED_DIFFERENT_TYPE:%.*]] @unused_
diff erent_type.body(i32* [[TMP0]], float* poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[UNUSED_DIFFERENT_TYPE]] [[TMP3]], 0
 ; CHECK-NEXT:    store float [[TMP4]], float* [[TMP1]], align 4
 ; CHECK-NEXT:    ret void
@@ -681,7 +681,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_noalias
 ; CHECK-SAME: (i32* noalias [[TMP0:%.*]], i32* noalias [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_NOALIAS:%.*]] @multiple_same_return_noalias.body(i32* undef, i32* undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_NOALIAS:%.*]] @multiple_same_return_noalias.body(i32* poison, i32* poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_NOALIAS]] [[TMP3]], 0
 ; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_NOALIAS]] [[TMP3]], 1
@@ -696,7 +696,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias
 ; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS:%.*]] @multiple_same_return_mayalias.body(i32* undef, i32* undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS:%.*]] @multiple_same_return_mayalias.body(i32* poison, i32* poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS]] [[TMP3]], 0
 ; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS]] [[TMP3]], 1
@@ -711,7 +711,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias_order
 ; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER:%.*]] @multiple_same_return_mayalias_order.body(i32* undef, i32* undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER:%.*]] @multiple_same_return_mayalias_order.body(i32* poison, i32* poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER]] [[TMP3]], 0
 ; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER]] [[TMP3]], 1
@@ -722,11 +722,11 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@store_in_entry_block
 ; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[OUT:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL0:%.*]] = load i32, i32 addrspace(1)* undef, align 4
+; CHECK-NEXT:    [[VAL0:%.*]] = load i32, i32 addrspace(1)* poison, align 4
 ; CHECK-NEXT:    store i32 [[VAL0]], i32* [[OUT]], align 4
 ; CHECK-NEXT:    br i1 [[ARG0]], label [[IF:%.*]], label [[ENDIF:%.*]]
 ; CHECK:       if:
-; CHECK-NEXT:    [[VAL1:%.*]] = load i32, i32 addrspace(1)* undef, align 4
+; CHECK-NEXT:    [[VAL1:%.*]] = load i32, i32 addrspace(1)* poison, align 4
 ; CHECK-NEXT:    br label [[ENDIF]]
 ; CHECK:       endif:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL1]], [[IF]] ]
@@ -740,7 +740,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@i1_one_out_arg_i32_1_use
 ; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[I1_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_one_out_arg_i32_1_use.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[I1_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_one_out_arg_i32_1_use.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[I1_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[I1_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
@@ -754,7 +754,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@i1_zeroext_one_out_arg_i32_1_use
 ; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_zeroext_one_out_arg_i32_1_use.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_zeroext_one_out_arg_i32_1_use.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
@@ -768,7 +768,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@i1_signext_one_out_arg_i32_1_use
 ; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_signext_one_out_arg_i32_1_use.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_signext_one_out_arg_i32_1_use.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
@@ -782,7 +782,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@p1i32_noalias_one_out_arg_i32_1_use
 ; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] @p1i32_noalias_one_out_arg_i32_1_use.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] @p1i32_noalias_one_out_arg_i32_1_use.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
@@ -797,14 +797,14 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@func_ptr_type.body
 ; CHECK-SAME: (void ()** [[OUT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[FUNC:%.*]] = load void ()*, void ()** undef, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[FUNC_PTR_TYPE:%.*]] undef, void ()* [[FUNC]], 0
+; CHECK-NEXT:    [[FUNC:%.*]] = load void ()*, void ()** poison, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[FUNC_PTR_TYPE:%.*]] poison, void ()* [[FUNC]], 0
 ; CHECK-NEXT:    ret [[FUNC_PTR_TYPE]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@func_ptr_type
 ; CHECK-SAME: (void ()** [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[FUNC_PTR_TYPE:%.*]] @func_ptr_type.body(void ()** undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[FUNC_PTR_TYPE:%.*]] @func_ptr_type.body(void ()** poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[FUNC_PTR_TYPE]] [[TMP2]], 0
 ; CHECK-NEXT:    store void ()* [[TMP3]], void ()** [[TMP0]], align 8
 ; CHECK-NEXT:    ret void
@@ -812,15 +812,15 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type.body
 ; CHECK-SAME: (void ()** [[OUT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[FUNC:%.*]] = load i32 ()*, i32 ()** undef, align 8
+; CHECK-NEXT:    [[FUNC:%.*]] = load i32 ()*, i32 ()** poison, align 8
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast void ()** [[OUT]] to i32 ()**
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_FUNC_PTR_TYPE:%.*]] undef, i32 ()* [[FUNC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_FUNC_PTR_TYPE:%.*]] poison, i32 ()* [[FUNC]], 0
 ; CHECK-NEXT:    ret [[BITCAST_FUNC_PTR_TYPE]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type
 ; CHECK-SAME: (void ()** [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_FUNC_PTR_TYPE:%.*]] @bitcast_func_ptr_type.body(void ()** undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_FUNC_PTR_TYPE:%.*]] @bitcast_func_ptr_type.body(void ()** poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_FUNC_PTR_TYPE]] [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast void ()** [[TMP0]] to i32 ()**
 ; CHECK-NEXT:    store i32 ()* [[TMP3]], i32 ()** [[TMP4]], align 8
@@ -834,7 +834,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@out_arg_small_array
 ; CHECK-SAME: ([4 x i32]* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[OUT_ARG_SMALL_ARRAY:%.*]] @out_arg_small_array.body([4 x i32]* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[OUT_ARG_SMALL_ARRAY:%.*]] @out_arg_small_array.body([4 x i32]* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[OUT_ARG_SMALL_ARRAY]] [[TMP2]], 0
 ; CHECK-NEXT:    store [4 x i32] [[TMP3]], [4 x i32]* [[TMP0]], align 4
 ; CHECK-NEXT:    ret void
@@ -848,22 +848,22 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@num_regs_return_limit
 ; CHECK-SAME: (i32* [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LOAD:%.*]] = load volatile <16 x i32>, <16 x i32> addrspace(1)* undef, align 64
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile <16 x i32>, <16 x i32> addrspace(1)* poison, align 64
 ; CHECK-NEXT:    store i32 [[VAL]], i32* [[OUT]], align 4
 ; CHECK-NEXT:    ret <16 x i32> [[LOAD]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit.body
 ; CHECK-SAME: (i32* [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LOAD:%.*]] = load volatile [15 x i32], [15 x i32] addrspace(1)* undef, align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT:%.*]] undef, [15 x i32] [[LOAD]], 0
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile [15 x i32], [15 x i32] addrspace(1)* poison, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT:%.*]] poison, [15 x i32] [[LOAD]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT]] [[TMP1]], i32 [[VAL]], 1
 ; CHECK-NEXT:    ret [[NUM_REGS_REACH_LIMIT]] [[TMP2]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit
 ; CHECK-SAME: (i32* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[NUM_REGS_REACH_LIMIT:%.*]] @num_regs_reach_limit.body(i32* undef, i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[NUM_REGS_REACH_LIMIT:%.*]] @num_regs_reach_limit.body(i32* poison, i32 [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT]] [[TMP3]], 1
 ; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT]] [[TMP3]], 0
@@ -872,9 +872,9 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_leftover.body
 ; CHECK-SAME: (i32* [[OUT0:%.*]], i32* [[OUT1:%.*]], i32 [[VAL0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LOAD0:%.*]] = load volatile [15 x i32], [15 x i32] addrspace(1)* undef, align 4
-; CHECK-NEXT:    [[LOAD1:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] undef, [15 x i32] [[LOAD0]], 0
+; CHECK-NEXT:    [[LOAD0:%.*]] = load volatile [15 x i32], [15 x i32] addrspace(1)* poison, align 4
+; CHECK-NEXT:    [[LOAD1:%.*]] = load volatile i32, i32 addrspace(1)* poison, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] poison, [15 x i32] [[LOAD0]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP1]], i32 [[LOAD1]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP2]], i32 [[VAL0]], 2
 ; CHECK-NEXT:    ret [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP3]]
@@ -882,7 +882,7 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_leftover
 ; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP4:%.*]] = call [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] @num_regs_reach_limit_leftover.body(i32* undef, i32* undef, i32 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] @num_regs_reach_limit_leftover.body(i32* poison, i32* poison, i32 [[TMP2]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 1
 ; CHECK-NEXT:    store i32 [[TMP5]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 2
@@ -894,13 +894,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@preserve_debug_info.body
 ; CHECK-SAME: (i32 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @may.clobber(), !dbg [[DBG5:![0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[PRESERVE_DEBUG_INFO:%.*]] undef, i32 [[ARG0]], 0, !dbg [[DBG11:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[PRESERVE_DEBUG_INFO:%.*]] poison, i32 [[ARG0]], 0, !dbg [[DBG11:![0-9]+]]
 ; CHECK-NEXT:    ret [[PRESERVE_DEBUG_INFO]] [[TMP1]], !dbg [[DBG11]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@preserve_debug_info
 ; CHECK-SAME: (i32 [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] !dbg [[DBG6:![0-9]+]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[PRESERVE_DEBUG_INFO:%.*]] @preserve_debug_info.body(i32 [[TMP0]], i32* undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[PRESERVE_DEBUG_INFO:%.*]] @preserve_debug_info.body(i32 [[TMP0]], i32* poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[PRESERVE_DEBUG_INFO]] [[TMP3]], 0
 ; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP1]], align 4
 ; CHECK-NEXT:    ret void
@@ -909,13 +909,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@preserve_metadata.body
 ; CHECK-SAME: (i32 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @may.clobber()
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[PRESERVE_METADATA:%.*]] undef, i32 [[ARG0]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[PRESERVE_METADATA:%.*]] poison, i32 [[ARG0]], 0
 ; CHECK-NEXT:    ret [[PRESERVE_METADATA]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@preserve_metadata
 ; CHECK-SAME: (i32 [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] !kernel_arg_access_qual !12 {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[PRESERVE_METADATA:%.*]] @preserve_metadata.body(i32 [[TMP0]], i32* undef)
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[PRESERVE_METADATA:%.*]] @preserve_metadata.body(i32 [[TMP0]], i32* poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[PRESERVE_METADATA]] [[TMP3]], 0
 ; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP1]], align 4
 ; CHECK-NEXT:    ret void
@@ -923,15 +923,15 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32.body
 ; CHECK-SAME: (<3 x i32>* [[OUT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison, align 16
 ; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast <3 x i32>* [[OUT]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3I32:%.*]] undef, <4 x i32> [[LOAD]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3I32:%.*]] poison, <4 x i32> [[LOAD]], 0
 ; CHECK-NEXT:    ret [[BITCAST_POINTER_V4I32_V3I32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32
 ; CHECK-SAME: (<3 x i32>* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3I32:%.*]] @bitcast_pointer_v4i32_v3i32.body(<3 x i32>* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3I32:%.*]] @bitcast_pointer_v4i32_v3i32.body(<3 x i32>* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3I32]] [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <3 x i32>* [[TMP0]] to <4 x i32>*
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
@@ -940,15 +940,15 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32.body
 ; CHECK-SAME: (<3 x float>* [[OUT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* poison, align 16
 ; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast <3 x float>* [[OUT]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3F32:%.*]] undef, <4 x i32> [[LOAD]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3F32:%.*]] poison, <4 x i32> [[LOAD]], 0
 ; CHECK-NEXT:    ret [[BITCAST_POINTER_V4I32_V3F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32
 ; CHECK-SAME: (<3 x float>* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3F32:%.*]] @bitcast_pointer_v4i32_v3f32.body(<3 x float>* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3F32:%.*]] @bitcast_pointer_v4i32_v3f32.body(<3 x float>* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3F32]] [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <3 x float>* [[TMP0]] to <4 x i32>*
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
@@ -957,15 +957,15 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32.body
 ; CHECK-SAME: (float* [[OUT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* poison, align 4
 ; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast float* [[OUT]] to i32*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F32:%.*]] undef, i32 [[LOAD]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F32:%.*]] poison, i32 [[LOAD]], 0
 ; CHECK-NEXT:    ret [[BITCAST_POINTER_I32_F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32
 ; CHECK-SAME: (float* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F32:%.*]] @bitcast_pointer_i32_f32.body(float* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F32:%.*]] @bitcast_pointer_i32_f32.body(float* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F32]] [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP0]] to i32*
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP4]], align 4
@@ -974,15 +974,15 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16.body
 ; CHECK-SAME: (half* [[OUT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* poison, align 4
 ; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast half* [[OUT]] to i32*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F16:%.*]] undef, i32 [[LOAD]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F16:%.*]] poison, i32 [[LOAD]], 0
 ; CHECK-NEXT:    ret [[BITCAST_POINTER_I32_F16]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16
 ; CHECK-SAME: (half* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F16:%.*]] @bitcast_pointer_i32_f16.body(half* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F16:%.*]] @bitcast_pointer_i32_f16.body(half* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F16]] [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast half* [[TMP0]] to i32*
 ; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP4]], align 4
@@ -991,15 +991,15 @@ attributes #2 = { alwaysinline nounwind }
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32.body
 ; CHECK-SAME: (i32* [[OUT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[LOAD:%.*]] = load volatile half, half addrspace(1)* undef, align 2
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile half, half addrspace(1)* poison, align 2
 ; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast i32* [[OUT]] to half*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_F16_I32:%.*]] undef, half [[LOAD]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_F16_I32:%.*]] poison, half [[LOAD]], 0
 ; CHECK-NEXT:    ret [[BITCAST_POINTER_F16_I32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32
 ; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_F16_I32:%.*]] @bitcast_pointer_f16_i32.body(i32* undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_F16_I32:%.*]] @bitcast_pointer_f16_i32.body(i32* poison)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_F16_I32]] [[TMP2]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP0]] to half*
 ; CHECK-NEXT:    store half [[TMP3]], half* [[TMP4]], align 2
@@ -1010,13 +1010,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0
 ; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32
 ; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3F32:%.*]] @bitcast_struct_v3f32_v3f32.body(%struct.v3f32* undef, <3 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3F32:%.*]] @bitcast_struct_v3f32_v3f32.body(%struct.v3f32* poison, <3 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
@@ -1027,13 +1027,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[VALUE]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3I32:%.*]] undef, <4 x i32> [[EXTRACTVEC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3I32:%.*]] poison, <4 x i32> [[EXTRACTVEC]], 0
 ; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32
 ; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <3 x i32> [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3I32:%.*]] @bitcast_struct_v3f32_v3i32.body(%struct.v3f32* undef, <3 x i32> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3I32:%.*]] @bitcast_struct_v3f32_v3i32.body(%struct.v3f32* poison, <3 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x i32>*
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 16
@@ -1043,13 +1043,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32.body
 ; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V4F32:%.*]] poison, <4 x float> [[VALUE]], 0
 ; CHECK-NEXT:    ret [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32
 ; CHECK-SAME: (%struct.v4f32* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V4F32:%.*]] @bitcast_struct_v4f32_v4f32.body(%struct.v4f32* undef, <4 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V4F32:%.*]] @bitcast_struct_v4f32_v4f32.body(%struct.v4f32* poison, <4 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v4f32* [[TMP0]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
@@ -1059,13 +1059,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32.body
 ; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <4 x i32> [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V4I32:%.*]] undef, <4 x i32> [[VALUE]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V4I32:%.*]] poison, <4 x i32> [[VALUE]], 0
 ; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32
 ; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <4 x i32> [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V4I32:%.*]] @bitcast_struct_v3f32_v4i32.body(%struct.v3f32* undef, <4 x i32> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V4I32:%.*]] @bitcast_struct_v3f32_v4i32.body(%struct.v3f32* poison, <4 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x i32>*
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 16
@@ -1076,13 +1076,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0
 ; CHECK-NEXT:    ret [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32
 ; CHECK-SAME: (%struct.v4f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V3F32:%.*]] @bitcast_struct_v4f32_v3f32.body(%struct.v4f32* undef, <3 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V3F32:%.*]] @bitcast_struct_v4f32_v3f32.body(%struct.v4f32* poison, <3 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v4f32* [[TMP0]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
@@ -1092,13 +1092,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32.body
 ; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <2 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V2F32:%.*]] undef, <2 x float> [[VALUE]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V2F32:%.*]] poison, <2 x float> [[VALUE]], 0
 ; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32
 ; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <2 x float> [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V2F32:%.*]] @bitcast_struct_v3f32_v2f32.body(%struct.v3f32* undef, <2 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V2F32:%.*]] @bitcast_struct_v3f32_v2f32.body(%struct.v3f32* poison, <2 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <2 x float>*
 ; CHECK-NEXT:    store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 8
@@ -1109,13 +1109,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0
 ; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32
 ; CHECK-SAME: (%struct.v3f32.f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] @bitcast_struct_v3f32_f32_v3f32.body(%struct.v3f32.f32* undef, <3 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] @bitcast_struct_v3f32_f32_v3f32.body(%struct.v3f32.f32* poison, <3 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32.f32* [[TMP0]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
@@ -1125,13 +1125,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32.body
 ; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] poison, <4 x float> [[VALUE]], 0
 ; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32
 ; CHECK-SAME: (%struct.v3f32.f32* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] @bitcast_struct_v3f32_f32_v4f32.body(%struct.v3f32.f32* undef, <4 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] @bitcast_struct_v3f32_f32_v4f32.body(%struct.v3f32.f32* poison, <4 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32.f32* [[TMP0]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
@@ -1141,13 +1141,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32.body
 ; CHECK-SAME: (%struct.i128* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.i128* [[OUT]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_I128_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_I128_V4F32:%.*]] poison, <4 x float> [[VALUE]], 0
 ; CHECK-NEXT:    ret [[BITCAST_STRUCT_I128_V4F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32
 ; CHECK-SAME: (%struct.i128* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_I128_V4F32:%.*]] @bitcast_struct_i128_v4f32.body(%struct.i128* undef, <4 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_I128_V4F32:%.*]] @bitcast_struct_i128_v4f32.body(%struct.i128* poison, <4 x float> [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_I128_V4F32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.i128* [[TMP0]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
@@ -1157,13 +1157,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32.body
 ; CHECK-SAME: ([4 x i32]* [[OUT:%.*]], [4 x float] [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast [4 x i32]* [[OUT]] to [4 x float]*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_ARRAY_V4I32_V4F32:%.*]] undef, [4 x float] [[VALUE]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_ARRAY_V4I32_V4F32:%.*]] poison, [4 x float] [[VALUE]], 0
 ; CHECK-NEXT:    ret [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32
 ; CHECK-SAME: ([4 x i32]* [[TMP0:%.*]], [4 x float] [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_ARRAY_V4I32_V4F32:%.*]] @bitcast_array_v4i32_v4f32.body([4 x i32]* undef, [4 x float] [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_ARRAY_V4I32_V4F32:%.*]] @bitcast_array_v4i32_v4f32.body([4 x i32]* poison, [4 x float] [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast [4 x i32]* [[TMP0]] to [4 x float]*
 ; CHECK-NEXT:    store [4 x float] [[TMP4]], [4 x float]* [[TMP5]], align 4
@@ -1177,18 +1177,18 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK:       ret0:
 ; CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 ; CHECK-NEXT:    [[CAST0:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0
 ; CHECK-NEXT:    ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP0]]
 ; CHECK:       ret1:
 ; CHECK-NEXT:    [[CAST1:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT:    [[LOAD:%.*]] = load <4 x float>, <4 x float> addrspace(1)* undef, align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] undef, <4 x float> [[LOAD]], 0
+; CHECK-NEXT:    [[LOAD:%.*]] = load <4 x float>, <4 x float> addrspace(1)* poison, align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] poison, <4 x float> [[LOAD]], 0
 ; CHECK-NEXT:    ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32
 ; CHECK-SAME: (i1 [[TMP0:%.*]], %struct.v3f32* [[TMP1:%.*]], <3 x float> [[TMP2:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP4:%.*]] = call [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] @multi_return_bitcast_struct_v3f32_v3f32.body(i1 [[TMP0]], %struct.v3f32* undef, <3 x float> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] @multi_return_bitcast_struct_v3f32_v3f32.body(i1 [[TMP0]], %struct.v3f32* poison, <3 x float> [[TMP2]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP4]], 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast %struct.v3f32* [[TMP1]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 16
@@ -1198,13 +1198,13 @@ attributes #2 = { alwaysinline nounwind }
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32.body
 ; CHECK-SAME: (<3 x float>* [[OUT:%.*]], [[STRUCT_V3F32:%.*]] [[VALUE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast <3 x float>* [[OUT]] to %struct.v3f32*
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_V3F32_STRUCT_V3F32:%.*]] undef, [[STRUCT_V3F32]] [[VALUE]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_V3F32_STRUCT_V3F32:%.*]] poison, [[STRUCT_V3F32]] [[VALUE]], 0
 ; CHECK-NEXT:    ret [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP1]]
 ;
 ;
 ; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32
 ; CHECK-SAME: (<3 x float>* [[TMP0:%.*]], [[STRUCT_V3F32:%.*]] [[TMP1:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_V3F32_STRUCT_V3F32:%.*]] @bitcast_v3f32_struct_v3f32.body(<3 x float>* undef, [[STRUCT_V3F32]] [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_V3F32_STRUCT_V3F32:%.*]] @bitcast_v3f32_struct_v3f32.body(<3 x float>* poison, [[STRUCT_V3F32]] [[TMP1]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP3]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <3 x float>* [[TMP0]] to %struct.v3f32*
 ; CHECK-NEXT:    store [[STRUCT_V3F32]] [[TMP4]], %struct.v3f32* [[TMP5]], align 16