[llvm-branch-commits] [llvm] AMDGPU: Replace some test i32 undef uses with poison (PR #131092)

Thu Mar 13 00:47:25 PDT 2025

https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/131092

None

>From 8d6e244811b467787ccadf639f856714dc3bac82 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 13 Mar 2025 14:46:17 +0700
Subject: [PATCH] AMDGPU: Replace some test i32 undef uses with poison

---
 .../CodeGen/AMDGPU/cgp-addressing-modes.ll     |  4 ++--
 llvm/test/CodeGen/AMDGPU/commute-shifts.ll     |  2 +-
 .../AMDGPU/constant-address-space-32bit.ll     |  2 +-
 .../test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll |  2 +-
 .../AMDGPU/extract_subvector_vec4_vec3.ll      |  4 ++--
 llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll   |  2 +-
 .../AMDGPU/indirect-call-known-callees.ll      |  2 +-
 .../AMDGPU/ipra-return-address-save-restore.ll |  2 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll     |  2 +-
 .../test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll | 14 +++++++-------
 ...m.amdgcn.struct.buffer.load.format.v3f16.ll |  2 +-
 ...dgcn.struct.ptr.buffer.load.format.v3f16.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll    |  2 +-
 llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll |  6 +++---
 .../mubuf-shader-vgpr-non-ptr-intrinsics.ll    |  6 +++---
 llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll  |  6 +++---
 ...ial-regcopy-and-spill-missed-at-regalloc.ll |  2 +-
 .../CodeGen/AMDGPU/scheduler-subrange-crash.ll | 10 +++++-----
 llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll      |  2 +-
 llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll |  2 +-
 .../AMDGPU/splitkit-getsubrangeformask.ll      | 18 +++++++++---------
 ...pr-descriptor-waterfall-loop-idom-update.ll |  2 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll                |  2 +-
 23 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index cf82b569b4839..8243815e44358 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -581,7 +581,7 @@ done:
 
 ; OPT-LABEL: @test_sink_local_small_offset_cmpxchg_i32(
 ; OPT: %sunkaddr = getelementptr i8, ptr addrspace(3) %in, i32 28
-; OPT: %tmp1.struct = cmpxchg ptr addrspace(3) %sunkaddr, i32 undef, i32 2 seq_cst monotonic
+; OPT: %tmp1.struct = cmpxchg ptr addrspace(3) %sunkaddr, i32 poison, i32 2 seq_cst monotonic
 define amdgpu_kernel void @test_sink_local_small_offset_cmpxchg_i32(ptr addrspace(3) %out, ptr addrspace(3) %in) {
 entry:
   %out.gep = getelementptr i32, ptr addrspace(3) %out, i32 999999
@@ -591,7 +591,7 @@ entry:
   br i1 %tmp0, label %endif, label %if
 
 if:
-  %tmp1.struct = cmpxchg ptr addrspace(3) %in.gep, i32 undef, i32 2 seq_cst monotonic
+  %tmp1.struct = cmpxchg ptr addrspace(3) %in.gep, i32 poison, i32 2 seq_cst monotonic
   %tmp1 = extractvalue { i32, i1 } %tmp1.struct, 0
   br label %endif
 
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 2930c6efd02b7..820ccb18a2b3d 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -30,7 +30,7 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
 ; VI-NEXT:    ; return to shader part epilog
 bb:
   %tmp = fptosi float %arg0 to i32
-  %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 undef, <8 x i32> poison, i32 0, i32 0)
+  %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 poison, <8 x i32> poison, i32 0, i32 0)
   %tmp2.f = extractelement <4 x float> %tmp1, i32 0
   %tmp2 = bitcast float %tmp2.f to i32
   %tmp3 = and i32 %tmp, 7
diff --git a/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll b/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
index 4f3cff4b32ea3..d8fb2641c8192 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
@@ -306,7 +306,7 @@ define amdgpu_vs float @load_addr_no_fold(ptr addrspace(6) inreg noalias %p0) #0
 define amdgpu_vs float @vgpr_arg_src(ptr addrspace(6) %arg) {
 main_body:
   %tmp9 = load ptr addrspace(8), ptr addrspace(6) %arg
-  %tmp10 = call nsz float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp9, i32 undef, i32 0, i32 0, i32 0) #1
+  %tmp10 = call nsz float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp9, i32 poison, i32 0, i32 0, i32 0) #1
   ret float %tmp10
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index 03b9f9bf82f3c..c5ddce2a69d43 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -204,7 +204,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
   %40 = fmul reassoc nnan nsz arcp contract afn float %39, 0x3F847AE140000000
   %41 = fadd reassoc nnan nsz arcp contract afn float %40, 0x3F947AE140000000
   %.i2415 = fmul reassoc nnan nsz arcp contract afn float %.i2407, %41
-  %42 = call <3 x float> @llvm.amdgcn.image.load.mip.2d.v3f32.i32(i32 7, i32 undef, i32 undef, i32 0, <8 x i32> poison, i32 0, i32 0)
+  %42 = call <3 x float> @llvm.amdgcn.image.load.mip.2d.v3f32.i32(i32 7, i32 poison, i32 undef, i32 0, <8 x i32> poison, i32 0, i32 0)
   %.i2521 = extractelement <3 x float> %42, i32 2
   %43 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float poison, float 0.000000e+00, float 1.000000e+00)
   %44 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
index 37a2e9e24dbd6..5ffc0ef4c359e 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -25,11 +25,11 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) {
   ; GCN-NEXT:   BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) poison`, align 1, addrspace 8)
   ; GCN-NEXT:   S_ENDPGM 0
 main_body:
-  %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) poison, i32 undef, i32 0, i32 0)
+  %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) poison, i32 poison, i32 0, i32 0)
   %tmp27 = bitcast <4 x float> %tmp25 to <16 x i8>
   %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
   %tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
-  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) poison, i32 undef, i32 0, i32 0) #3
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) poison, i32 poison, i32 0, i32 0) #3
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll b/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
index cd410ab8bc143..0c4a2e3112d37 100644
--- a/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
+++ b/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
@@ -11,7 +11,7 @@
 ; GCN: IMAGE_LOAD_V4_V2
 define amdgpu_cs void @_amdgpu_cs_main(i32 %dummy) local_unnamed_addr #0 {
 .entry:
-  %unused.result = tail call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 undef, i32 undef, <8 x i32> poison, i32 0, i32 0) #3
+  %unused.result = tail call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 poison, i32 undef, <8 x i32> poison, i32 0, i32 0) #3
   call void asm sideeffect ";", "" () #0
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
index 2dbc117f987ef..f8770642cc006 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
@@ -75,7 +75,7 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
 bb:
   %cond = load i1, ptr addrspace(4) null
   %tmp = select i1 %cond, ptr @wobble, ptr @snork
-  call void %tmp(ptr poison, i32 undef, ptr poison)
+  call void %tmp(ptr poison, i32 poison, ptr poison)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll
index 31aaee5ff7dfb..f5117dfb3dbcc 100644
--- a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll
+++ b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll
@@ -185,7 +185,7 @@ sw.bb10:
 ; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]],
 ; GCN: s_waitcnt vmcnt(0)
 ; GCN: s_setpc_b64 s[30:31]
-  call fastcc void @svm_node_closure_bsdf(ptr addrspace(1) null, ptr null, <4 x i32> zeroinitializer, ptr null, i32 undef, i8 undef, float poison, float poison, float poison, i1 undef, <4 x i32> poison, float poison, i32 undef, i1 undef, i1 undef, i1 undef, float poison, ptr addrspace(1) poison, ptr addrspace(1) poison, ptr addrspace(1) poison, i1 undef, ptr addrspace(1) poison, i32 undef, i1 undef, i32 undef, i64 undef, i32 undef)
+  call fastcc void @svm_node_closure_bsdf(ptr addrspace(1) null, ptr null, <4 x i32> zeroinitializer, ptr null, i32 poison, i8 undef, float poison, float poison, float poison, i1 undef, <4 x i32> poison, float poison, i32 undef, i1 undef, i1 undef, i1 undef, float poison, ptr addrspace(1) poison, ptr addrspace(1) poison, ptr addrspace(1) poison, i1 undef, ptr addrspace(1) poison, i32 undef, i1 undef, i32 undef, i64 undef, i32 undef)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
index 429ee21598384..77a40160f1daa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
@@ -9,7 +9,7 @@ declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
 ; NOPRIM: exp invalid_target_20 v0, off, off, off done{{$}}
 ; PRIM: {{exp|export}} prim v0, off, off, off done{{$}}
 define amdgpu_gs void @test_export_prim_i32(i32 inreg %a) #0 {
-  call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %a, i32 undef, i32 undef, i32 undef, i1 true, i1 false)
+  call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %a, i32 poison, i32 undef, i32 undef, i1 true, i1 false)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll
index 438c9446610c0..3c9b9d43d8814 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll
@@ -22,8 +22,8 @@ define amdgpu_kernel void @undef_i32() #0 {
 ; GFX12-NEXT:    export pos0 off, off, off, off row_en
 ; GFX12-NEXT:    export pos1 off, off, off, off done row_en
 ; GFX12-NEXT:    s_endpgm
-  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i1 false, i32 0)
-  call void @llvm.amdgcn.exp.row.i32(i32 13, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i1 true, i32 0)
+  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 0, i32 poison, i32 undef, i32 undef, i32 undef, i1 false, i32 0)
+  call void @llvm.amdgcn.exp.row.i32(i32 13, i32 0, i32 poison, i32 undef, i32 undef, i32 undef, i1 true, i32 0)
   ret void
 }
 
@@ -62,8 +62,8 @@ define amdgpu_kernel void @zero_i32() #0 {
 ; GFX12-NEXT:    export pos0 v0, v0, v0, off row_en
 ; GFX12-NEXT:    export pos1 v0, v0, v0, off done row_en
 ; GFX12-NEXT:    s_endpgm
-  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 7, i32 0, i32 0, i32 0, i32 undef, i1 false, i32 0)
-  call void @llvm.amdgcn.exp.row.i32(i32 13, i32 7, i32 0, i32 0, i32 0, i32 undef, i1 true, i32 0)
+  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 7, i32 0, i32 0, i32 0, i32 poison, i1 false, i32 0)
+  call void @llvm.amdgcn.exp.row.i32(i32 13, i32 7, i32 0, i32 0, i32 0, i32 poison, i1 true, i32 0)
   ret void
 }
 
@@ -103,7 +103,7 @@ define amdgpu_kernel void @id_i32() #0 {
 ; GFX12-NEXT:    export pos0 v0, off, off, off done row_en
 ; GFX12-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 %id, i32 undef, i32 undef, i32 undef, i1 true, i32 0)
+  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 %id, i32 poison, i32 undef, i32 undef, i1 true, i32 0)
   ret void
 }
 
@@ -126,7 +126,7 @@ define amdgpu_kernel void @id_arg_i32(i32 %row) #0 {
 ; GFX12-NEXT:    export pos0 v0, off, off, off done row_en
 ; GFX12-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 %id, i32 undef, i32 undef, i32 undef, i1 true, i32 %row)
+  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 %id, i32 poison, i32 undef, i32 undef, i1 true, i32 %row)
   ret void
 }
 
@@ -152,6 +152,6 @@ define amdgpu_kernel void @id_row_i32() #0 {
 ; GFX12-NEXT:    export pos0 v0, off, off, off done row_en
 ; GFX12-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 99, i32 undef, i32 undef, i32 undef, i1 true, i32 %id)
+  call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 99, i32 poison, i32 undef, i32 undef, i1 true, i32 %id)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
index fed7a8ec105fd..185beec288db7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
@@ -142,7 +142,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
 ; GFX12-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_and_b32 v1, 0xffff, v6
 ; GFX12-NEXT:    ds_store_2addr_b32 v2, v0, v1 offset0:7 offset1:8
 bb:
-  %i = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 undef)
+  %i = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 poison)
   %i2 = call nsz arcp <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32> %arg, i32 %arg1, i32 0, i32 0, i32 0)
   %i3 = bitcast <3 x half> %i2 to <3 x i16>
   %i4 = extractelement <3 x i16> %i3, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
index 46b2516f72f8e..7b11072363539 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
@@ -112,7 +112,7 @@ define amdgpu_gs void @main(ptr addrspace(8) %arg, i32 %arg1) {
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_and_b32 v1, 0xffff, v6
 ; GFX11-NEXT:    ds_store_2addr_b32 v2, v0, v1 offset0:7 offset1:8
 bb:
-  %i = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 undef)
+  %i = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 poison)
   %i2 = call nsz arcp <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8) %arg, i32 %arg1, i32 0, i32 0, i32 0)
   %i3 = bitcast <3 x half> %i2 to <3 x i16>
   %i4 = extractelement <3 x i16> %i3, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll
index 0418f32c3f731..91fc6065dc2bb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll
@@ -42,7 +42,7 @@ define i32 @test_s_wqm_constant_undef_i32() {
 ; GFX11-NEXT:    s_wqm_b32 s0, s0
 ; GFX11-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %br = call i32 @llvm.amdgcn.s.wqm.i32(i32 undef)
+  %br = call i32 @llvm.amdgcn.s.wqm.i32(i32 poison)
   ret i32 %br
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
index 7f3f3bdcadbfc..308ca34058f59 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -46,7 +46,7 @@ loop:
   br i1 %tmp27, label %then, label %endif
 
 then:                                             ; preds = %bb
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) poison, i32 0, i32 undef, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) poison, i32 0, i32 poison, i32 0)
   br label %endif
 
 endif:                                             ; preds = %bb28, %bb
@@ -85,7 +85,7 @@ loop:
   %tmp23phi = phi i32 [ %tmp23, %loop ], [ 0, %entry ]
   %tmp23 = add nuw i32 %tmp23phi, 1
   %tmp27 = icmp ult i32 %arg, %tmp23
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) poison, i32 0, i32 undef, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) poison, i32 0, i32 poison, i32 0)
   br i1 %tmp27, label %loop, label %loopexit
 
 loopexit:
@@ -136,7 +136,7 @@ loop:
   br i1 %tmp27, label %then, label %endif
 
 then:                                             ; preds = %bb
-  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) poison, i32 0, i32 undef, i32 0)
+  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) poison, i32 0, i32 poison, i32 0)
   br label %endif
 
 endif:                                             ; preds = %bb28, %bb
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll
index c0c93f7badde2..80a667555a424 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll
@@ -22,7 +22,7 @@ define amdgpu_vs float @test_idxen(ptr addrspace(4) inreg %base, i32 %i) {
 main_body:
   %ptr = getelementptr <4 x i32>, ptr addrspace(4) %base, i32 %i
   %tmp2 = load <4 x i32>, ptr addrspace(4) %ptr, align 32
-  %tmp7 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i32 0, i32 0)
+  %tmp7 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp2, i32 poison, i32 0, i32 0, i32 0)
   ret float %tmp7
 }
 
@@ -32,7 +32,7 @@ define amdgpu_vs float @test_offen(ptr addrspace(4) inreg %base, i32 %i) {
 main_body:
   %ptr = getelementptr <4 x i32>, ptr addrspace(4) %base, i32 %i
   %tmp2 = load <4 x i32>, ptr addrspace(4) %ptr, align 32
-  %tmp7 = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i32 0)
+  %tmp7 = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %tmp2, i32 poison, i32 0, i32 0)
   ret float %tmp7
 }
 
@@ -42,7 +42,7 @@ define amdgpu_vs float @test_both(ptr addrspace(4) inreg %base, i32 %i) {
 main_body:
   %ptr = getelementptr <4 x i32>, ptr addrspace(4) %base, i32 %i
   %tmp2 = load <4 x i32>, ptr addrspace(4) %ptr, align 32
-  %tmp7 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i32 0, i32 0)
+  %tmp7 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp2, i32 poison, i32 undef, i32 0, i32 0)
   ret float %tmp7
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
index 202ce009ef69a..8182440085813 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
@@ -19,7 +19,7 @@ define amdgpu_vs float @test_idxen(ptr addrspace(4) inreg %base, i32 %i) {
 main_body:
   %ptr = getelementptr ptr addrspace(8), ptr addrspace(4) %base, i32 %i
   %tmp2 = load ptr addrspace(8), ptr addrspace(4) %ptr, align 32
-  %tmp7 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp2, i32 undef, i32 0, i32 0, i32 0)
+  %tmp7 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp2, i32 poison, i32 0, i32 0, i32 0)
   ret float %tmp7
 }
 
@@ -29,7 +29,7 @@ define amdgpu_vs float @test_offen(ptr addrspace(4) inreg %base, i32 %i) {
 main_body:
   %ptr = getelementptr ptr addrspace(8), ptr addrspace(4) %base, i32 %i
   %tmp2 = load ptr addrspace(8), ptr addrspace(4) %ptr, align 32
-  %tmp7 = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp2, i32 undef, i32 0, i32 0)
+  %tmp7 = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp2, i32 poison, i32 0, i32 0)
   ret float %tmp7
 }
 
@@ -39,7 +39,7 @@ define amdgpu_vs float @test_both(ptr addrspace(4) inreg %base, i32 %i) {
 main_body:
   %ptr = getelementptr ptr addrspace(8), ptr addrspace(4) %base, i32 %i
   %tmp2 = load ptr addrspace(8), ptr addrspace(4) %ptr, align 32
-  %tmp7 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp2, i32 undef, i32 undef, i32 0, i32 0)
+  %tmp7 = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %tmp2, i32 poison, i32 undef, i32 0, i32 0)
   ret float %tmp7
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 87b04b98e4691..106824a085b42 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -92,7 +92,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
   ; PEI-GFX90A-NEXT:   GLOBAL_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) poison`, addrspace 1)
   ; PEI-GFX90A-NEXT:   GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
   ; PEI-GFX90A-NEXT:   S_ENDPGM 0
-  call void asm sideeffect "; use $0", "a" (i32 undef)
+  call void asm sideeffect "; use $0", "a" (i32 poison)
   %v0 = call <4 x i32> asm sideeffect "; def $0", "=v" ()
   %v1 = call <2 x i32> asm sideeffect "; def $0", "=v" ()
   %mai = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %arg, i32 0, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll b/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
index d6a36019df1e6..a81c18ebb259e 100644
--- a/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
@@ -24,7 +24,7 @@ main_body:
   %array_vector9 = insertelement <4 x float> <float 0.000000e+00, float poison, float poison, float poison>, float %tmp1, i32 1
   %array_vector10 = insertelement <4 x float> %array_vector9, float 0.000000e+00, i32 2
   %array_vector11 = insertelement <4 x float> %array_vector10, float poison, i32 3
-  %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> poison, i32 undef, i32 4864, i32 0)
+  %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> poison, i32 poison, i32 4864, i32 0)
   call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 %tmp3, <4 x i32> poison, i32 36, i32 %arg, i32 68, i32 3)
   %bc = bitcast <4 x float> %array_vector3 to <4 x i32>
   %tmp4 = extractelement <4 x i32> %bc, i32 undef
@@ -35,13 +35,13 @@ main_body:
   %array_vector21 = insertelement <4 x float> <float 0.000000e+00, float poison, float poison, float poison>, float %tmp, i32 1
   %array_vector22 = insertelement <4 x float> %array_vector21, float poison, i32 2
   %array_vector23 = insertelement <4 x float> %array_vector22, float poison, i32 3
-  call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 undef, <4 x i32> poison, i32 28, i32 %arg, i32 68, i32 3)
+  call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 poison, <4 x i32> poison, i32 28, i32 %arg, i32 68, i32 3)
   %bc52 = bitcast <4 x float> %array_vector23 to <4 x i32>
   %tmp6 = extractelement <4 x i32> %bc52, i32 undef
   call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 %tmp6, <4 x i32> poison, i32 64, i32 %arg, i32 68, i32 3)
-  call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 undef, <4 x i32> poison, i32 20, i32 %arg, i32 68, i32 3)
-  call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 undef, <4 x i32> poison, i32 56, i32 %arg, i32 68, i32 3)
-  call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 undef, <4 x i32> poison, i32 92, i32 %arg, i32 68, i32 3)
+  call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 poison, <4 x i32> poison, i32 20, i32 %arg, i32 68, i32 3)
+  call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 poison, <4 x i32> poison, i32 56, i32 %arg, i32 68, i32 3)
+  call void @llvm.amdgcn.raw.tbuffer.store.i32(i32 poison, <4 x i32> poison, i32 92, i32 %arg, i32 68, i32 3)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
index 0b08701da01ed..68dad8a5347b5 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -2086,7 +2086,7 @@ bb1:                                              ; preds = %bb11, %bb
   br i1 true, label %bb2, label %bb11
 
 bb2:                                              ; preds = %bb1
-  %tmp3 = call i32 asm "v_and_b32_e32 $0, $1, $2", "=v,s,v"(i32 65535, i32 undef) #1
+  %tmp3 = call i32 asm "v_and_b32_e32 $0, $1, $2", "=v,s,v"(i32 65535, i32 poison) #1
   %tmp5 = or i32 %tmp3, 65536
   %tmp6 = insertelement <2 x i32> %tmp, i32 %tmp5, i64 0
   br label %bb11
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll
index da31c7681c82e..585bba4948fbd 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll
@@ -9,7 +9,7 @@
 define void @foo(i32 %i) {
   call void @llvm.lifetime.start.p0(i64 1, ptr poison)
   call void @llvm.lifetime.end.p0(i64 1, ptr poison)
-  call void @llvm.dbg.value(metadata i32 undef, metadata !DILocalVariable(name: "1", scope: !2), metadata !DIExpression()), !dbg !3
+  call void @llvm.dbg.value(metadata i32 poison, metadata !DILocalVariable(name: "1", scope: !2), metadata !DIExpression()), !dbg !3
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
index f2505ea90482c..c5e76e3ea6ad6 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
@@ -566,7 +566,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
   %200 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %199, i32 0)
   %201 = add i32 %200, -98
   %202 = or i32 %197, %201
-  %203 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 undef, i32 0)
+  %203 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 poison, i32 0)
   %204 = add i32 %203, -114
   %205 = or i32 %202, %204
   %206 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %2, i32 0
@@ -622,13 +622,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
   %256 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 %255, i32 0)
   %257 = add i32 %256, -297
   %258 = or i32 %253, %257
-  %259 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 undef, i32 0)
+  %259 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
   %260 = add i32 %259, -313
   %261 = or i32 %258, %260
-  %262 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 undef, i32 0)
+  %262 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
   %263 = add i32 %262, -329
   %264 = or i32 %261, %263
-  %265 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 undef, i32 0)
+  %265 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
   %266 = add i32 %265, -345
   %267 = or i32 %264, %266
   %268 = getelementptr <{ [4 x i32], [9 x %llpc.array.element.5] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %4, i32 0
@@ -706,16 +706,16 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
   %331 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 0, i32 0)
   %332 = add i32 %331, -474
   %333 = or i32 %330, %332
-  %334 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 undef, i32 0)
+  %334 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
   %335 = add i32 %334, -475
   %336 = or i32 %333, %335
-  %337 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 undef, i32 0)
+  %337 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
   %338 = add i32 %337, -491
   %339 = or i32 %336, %338
-  %340 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 undef, i32 0)
+  %340 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
   %341 = add i32 %340, -507
   %342 = or i32 %339, %341
-  %343 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 undef, i32 0)
+  %343 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
   %344 = add i32 %343, -539
   %345 = or i32 %342, %344
   %346 = getelementptr i8, ptr addrspace(4) %17, i64 96
@@ -756,7 +756,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
   %380 = load <8 x i32>, ptr addrspace(4) undef, align 32
   %.i010.i = select i1 %.not.i, float 0x36A0000000000000, float 0.000000e+00
   %381 = insertelement <4 x float> poison, float %.i010.i, i32 3
-  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %381, i32 15, i32 undef, i32 undef, <8 x i32> %380, i32 0, i32 0)
+  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %381, i32 15, i32 poison, i32 undef, <8 x i32> %380, i32 0, i32 0)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
index 6994f8d2cf58f..809d4b153fbc8 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
@@ -76,7 +76,7 @@ entry:
 
 bb0:
   %desc = load ptr addrspace(8), ptr %arg, align 8
-  tail call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) %desc, i32 undef, i32 0, i32 0)
+  tail call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float poison, ptr addrspace(8) %desc, i32 poison, i32 0, i32 0)
   br label %bb0
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index a033960fcc0d3..02d6ed339efcf 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -1994,7 +1994,7 @@ define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
 ; GFX10-W32-NEXT:    v_mov_b32_e32 v3, v7
 ; GFX10-W32-NEXT:    ; return to shader part epilog
 entry:
-  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 undef, <8 x i32> poison, i32 0, i32 0)
+  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %in, i32 15, i32 poison, <8 x i32> poison, i32 0, i32 0)
   br label %loop
 
 loop: