[llvm] 8cc6c2e - AMDGPU: Migrate more tests away from undef (#131314)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 01:57:51 PDT 2025
Author: Matt Arsenault
Date: 2025-03-17T15:57:47+07:00
New Revision: 8cc6c2e80fb007f0e0e2ee65bca070c0f7fb7010
URL: https://github.com/llvm/llvm-project/commit/8cc6c2e80fb007f0e0e2ee65bca070c0f7fb7010
DIFF: https://github.com/llvm/llvm-project/commit/8cc6c2e80fb007f0e0e2ee65bca070c0f7fb7010.diff
LOG: AMDGPU: Migrate more tests away from undef (#131314)
andorbitset.ll is interesting since it directly depends on the
difference between poison and undef. Not sure it's useful to keep
the version using poison, I assume none of this code makes it to
codegen.
si-spill-cf.ll was also a nasty case, which I doubt has been reproducing
its original issue for a very long time. I had to reclaim an older version,
replace some of the poison uses, and run simplify-cfg. There's a very
slight change in the final CFG with this, but final the output is approximately
the same as it used to be.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
llvm/test/CodeGen/AMDGPU/andorbitset.ll
llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
llvm/test/CodeGen/AMDGPU/fold-fabs.ll
llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir
llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
llvm/test/CodeGen/AMDGPU/si-spill-cf.ll
llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
llvm/test/CodeGen/AMDGPU/wave32.ll
llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir
llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index a4eab62f501ce..3160e38df5e3f 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -513,115 +513,117 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-LABEL: introduced_copy_to_sgpr:
; GFX908: ; %bb.0: ; %bb
; GFX908-NEXT: global_load_ushort v16, v[0:1], off glc
-; GFX908-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX908-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
-; GFX908-NEXT: s_load_dword s7, s[8:9], 0x18
-; GFX908-NEXT: s_mov_b32 s6, 0
-; GFX908-NEXT: s_mov_b32 s9, s6
+; GFX908-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
+; GFX908-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x10
+; GFX908-NEXT: s_load_dword s0, s[8:9], 0x18
+; GFX908-NEXT: s_mov_b32 s12, 0
+; GFX908-NEXT: s_mov_b32 s9, s12
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
-; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s3
-; GFX908-NEXT: s_sub_i32 s8, 0, s3
-; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s7
+; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s7
+; GFX908-NEXT: s_sub_i32 s1, 0, s7
+; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s0
; GFX908-NEXT: v_mov_b32_e32 v19, 0
; GFX908-NEXT: v_rcp_iflag_f32_e32 v2, v0
; GFX908-NEXT: v_mov_b32_e32 v0, 0
; GFX908-NEXT: v_mov_b32_e32 v1, 0
; GFX908-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GFX908-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX908-NEXT: v_readfirstlane_b32 s10, v2
-; GFX908-NEXT: s_mul_i32 s8, s8, s10
-; GFX908-NEXT: s_mul_hi_u32 s8, s10, s8
-; GFX908-NEXT: s_add_i32 s10, s10, s8
-; GFX908-NEXT: s_mul_hi_u32 s8, s2, s10
-; GFX908-NEXT: s_mul_i32 s10, s8, s3
-; GFX908-NEXT: s_sub_i32 s2, s2, s10
-; GFX908-NEXT: s_add_i32 s11, s8, 1
-; GFX908-NEXT: s_sub_i32 s10, s2, s3
-; GFX908-NEXT: s_cmp_ge_u32 s2, s3
-; GFX908-NEXT: s_cselect_b32 s8, s11, s8
-; GFX908-NEXT: s_cselect_b32 s2, s10, s2
-; GFX908-NEXT: s_add_i32 s10, s8, 1
-; GFX908-NEXT: s_cmp_ge_u32 s2, s3
-; GFX908-NEXT: s_cselect_b32 s8, s10, s8
-; GFX908-NEXT: s_lshr_b32 s7, s7, 16
-; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s7
-; GFX908-NEXT: s_lshl_b64 s[2:3], s[0:1], 5
-; GFX908-NEXT: s_lshl_b64 s[12:13], s[8:9], 5
-; GFX908-NEXT: s_lshl_b64 s[10:11], s[4:5], 5
-; GFX908-NEXT: s_or_b32 s10, s10, 28
+; GFX908-NEXT: v_readfirstlane_b32 s2, v2
+; GFX908-NEXT: s_mul_i32 s1, s1, s2
+; GFX908-NEXT: s_mul_hi_u32 s1, s2, s1
+; GFX908-NEXT: s_add_i32 s2, s2, s1
+; GFX908-NEXT: s_mul_hi_u32 s1, s6, s2
+; GFX908-NEXT: s_mul_i32 s2, s1, s7
+; GFX908-NEXT: s_sub_i32 s2, s6, s2
+; GFX908-NEXT: s_add_i32 s3, s1, 1
+; GFX908-NEXT: s_sub_i32 s6, s2, s7
+; GFX908-NEXT: s_cmp_ge_u32 s2, s7
+; GFX908-NEXT: s_cselect_b32 s1, s3, s1
+; GFX908-NEXT: s_cselect_b32 s2, s6, s2
+; GFX908-NEXT: s_add_i32 s3, s1, 1
+; GFX908-NEXT: s_cmp_ge_u32 s2, s7
+; GFX908-NEXT: s_cselect_b32 s8, s3, s1
+; GFX908-NEXT: s_lshr_b32 s2, s0, 16
+; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s2
+; GFX908-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
+; GFX908-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
+; GFX908-NEXT: s_and_b64 s[0:1], exec, s[0:1]
+; GFX908-NEXT: s_or_b32 s14, s14, 28
+; GFX908-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: v_readfirstlane_b32 s7, v16
-; GFX908-NEXT: s_and_b32 s7, 0xffff, s7
-; GFX908-NEXT: s_mul_i32 s1, s1, s7
-; GFX908-NEXT: s_mul_hi_u32 s9, s0, s7
-; GFX908-NEXT: s_mul_i32 s0, s0, s7
-; GFX908-NEXT: s_add_i32 s1, s9, s1
-; GFX908-NEXT: s_lshl_b64 s[14:15], s[0:1], 5
+; GFX908-NEXT: v_readfirstlane_b32 s2, v16
+; GFX908-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX908-NEXT: s_mul_i32 s3, s5, s2
+; GFX908-NEXT: s_mul_hi_u32 s5, s4, s2
+; GFX908-NEXT: s_mul_i32 s2, s4, s2
+; GFX908-NEXT: s_add_i32 s3, s5, s3
+; GFX908-NEXT: s_lshl_b64 s[4:5], s[2:3], 5
; GFX908-NEXT: s_branch .LBB3_2
; GFX908-NEXT: .LBB3_1: ; %Flow20
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX908-NEXT: s_andn2_b64 vcc, exec, s[0:1]
+; GFX908-NEXT: s_andn2_b64 vcc, exec, s[2:3]
; GFX908-NEXT: s_cbranch_vccz .LBB3_12
; GFX908-NEXT: .LBB3_2: ; %bb9
; GFX908-NEXT: ; =>This Loop Header: Depth=1
; GFX908-NEXT: ; Child Loop BB3_5 Depth 2
-; GFX908-NEXT: s_mov_b64 s[16:17], -1
-; GFX908-NEXT: s_cbranch_scc0 .LBB3_10
+; GFX908-NEXT: s_mov_b64 s[18:19], -1
+; GFX908-NEXT: s_mov_b64 vcc, s[0:1]
+; GFX908-NEXT: s_cbranch_vccz .LBB3_10
; GFX908-NEXT: ; %bb.3: ; %bb14
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
-; GFX908-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
-; GFX908-NEXT: s_mov_b32 s7, s6
-; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1]
-; GFX908-NEXT: v_mov_b32_e32 v4, s6
-; GFX908-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v6
-; GFX908-NEXT: v_mov_b32_e32 v6, s6
-; GFX908-NEXT: v_mov_b32_e32 v9, s7
-; GFX908-NEXT: v_mov_b32_e32 v5, s7
-; GFX908-NEXT: v_mov_b32_e32 v7, s7
-; GFX908-NEXT: v_mov_b32_e32 v8, s6
-; GFX908-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
+; GFX908-NEXT: v_cmp_gt_i64_e64 s[2:3], s[10:11], -1
+; GFX908-NEXT: s_mov_b32 s13, s12
+; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[2:3]
+; GFX908-NEXT: v_mov_b32_e32 v4, s12
+; GFX908-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, v6
+; GFX908-NEXT: v_mov_b32_e32 v6, s12
+; GFX908-NEXT: v_mov_b32_e32 v8, s12
+; GFX908-NEXT: v_mov_b32_e32 v5, s13
+; GFX908-NEXT: v_mov_b32_e32 v7, s13
+; GFX908-NEXT: v_mov_b32_e32 v9, s13
+; GFX908-NEXT: v_cmp_lt_i64_e64 s[18:19], s[10:11], 0
; GFX908-NEXT: v_mov_b32_e32 v11, v5
-; GFX908-NEXT: s_mov_b64 s[18:19], s[10:11]
+; GFX908-NEXT: s_mov_b64 s[20:21], s[14:15]
; GFX908-NEXT: v_mov_b32_e32 v10, v4
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: v_readfirstlane_b32 s7, v2
-; GFX908-NEXT: v_readfirstlane_b32 s9, v3
-; GFX908-NEXT: s_add_u32 s7, s7, 1
-; GFX908-NEXT: s_addc_u32 s9, s9, 0
-; GFX908-NEXT: s_mul_hi_u32 s20, s2, s7
-; GFX908-NEXT: s_mul_i32 s9, s2, s9
-; GFX908-NEXT: s_mul_i32 s21, s3, s7
-; GFX908-NEXT: s_add_i32 s9, s20, s9
-; GFX908-NEXT: s_mul_i32 s7, s2, s7
-; GFX908-NEXT: s_add_i32 s9, s9, s21
+; GFX908-NEXT: v_readfirstlane_b32 s9, v2
+; GFX908-NEXT: v_readfirstlane_b32 s13, v3
+; GFX908-NEXT: s_add_u32 s9, s9, 1
+; GFX908-NEXT: s_addc_u32 s13, s13, 0
+; GFX908-NEXT: s_mul_hi_u32 s22, s6, s9
+; GFX908-NEXT: s_mul_i32 s13, s6, s13
+; GFX908-NEXT: s_mul_i32 s23, s7, s9
+; GFX908-NEXT: s_add_i32 s13, s22, s13
+; GFX908-NEXT: s_mul_i32 s9, s6, s9
+; GFX908-NEXT: s_add_i32 s13, s13, s23
; GFX908-NEXT: s_branch .LBB3_5
; GFX908-NEXT: .LBB3_4: ; %bb58
; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2
; GFX908-NEXT: v_add_co_u32_sdwa v2, vcc, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX908-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
-; GFX908-NEXT: s_add_u32 s18, s18, s14
-; GFX908-NEXT: v_cmp_lt_i64_e64 s[22:23], -1, v[2:3]
-; GFX908-NEXT: s_addc_u32 s19, s19, s15
-; GFX908-NEXT: s_mov_b64 s[20:21], 0
-; GFX908-NEXT: s_andn2_b64 vcc, exec, s[22:23]
+; GFX908-NEXT: s_add_u32 s20, s20, s4
+; GFX908-NEXT: v_cmp_lt_i64_e64 s[24:25], -1, v[2:3]
+; GFX908-NEXT: s_addc_u32 s21, s21, s5
+; GFX908-NEXT: s_mov_b64 s[22:23], 0
+; GFX908-NEXT: s_andn2_b64 vcc, exec, s[24:25]
; GFX908-NEXT: s_cbranch_vccz .LBB3_9
; GFX908-NEXT: .LBB3_5: ; %bb16
; GFX908-NEXT: ; Parent Loop BB3_2 Depth=1
; GFX908-NEXT: ; => This Inner Loop Header: Depth=2
-; GFX908-NEXT: s_add_u32 s20, s18, s7
-; GFX908-NEXT: s_addc_u32 s21, s19, s9
-; GFX908-NEXT: global_load_dword v21, v19, s[20:21] offset:-12 glc
+; GFX908-NEXT: s_add_u32 s22, s20, s9
+; GFX908-NEXT: s_addc_u32 s23, s21, s13
+; GFX908-NEXT: global_load_dword v21, v19, s[22:23] offset:-12 glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: global_load_dword v20, v19, s[20:21] offset:-8 glc
+; GFX908-NEXT: global_load_dword v20, v19, s[22:23] offset:-8 glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: global_load_dword v12, v19, s[20:21] offset:-4 glc
+; GFX908-NEXT: global_load_dword v12, v19, s[22:23] offset:-4 glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
-; GFX908-NEXT: global_load_dword v12, v19, s[20:21] glc
+; GFX908-NEXT: global_load_dword v12, v19, s[22:23] glc
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: ds_read_b64 v[12:13], v19
; GFX908-NEXT: ds_read_b64 v[14:15], v0
-; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1]
+; GFX908-NEXT: s_and_b64 vcc, exec, s[2:3]
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_cbranch_vccnz .LBB3_7
; GFX908-NEXT: ; %bb.6: ; %bb51
@@ -648,28 +650,28 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: v_add_f32_e32 v11, v11, v13
; GFX908-NEXT: s_branch .LBB3_4
; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
-; GFX908-NEXT: s_mov_b64 s[20:21], s[16:17]
-; GFX908-NEXT: s_andn2_b64 vcc, exec, s[20:21]
+; GFX908-NEXT: s_mov_b64 s[22:23], s[18:19]
+; GFX908-NEXT: s_andn2_b64 vcc, exec, s[22:23]
; GFX908-NEXT: s_cbranch_vccz .LBB3_4
; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
-; GFX908-NEXT: s_mov_b64 s[20:21], -1
+; GFX908-NEXT: s_mov_b64 s[22:23], -1
; GFX908-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GFX908-NEXT: ; implicit-def: $sgpr18_sgpr19
+; GFX908-NEXT: ; implicit-def: $sgpr20_sgpr21
; GFX908-NEXT: .LBB3_9: ; %loop.exit.guard
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX908-NEXT: s_xor_b64 s[16:17], s[20:21], -1
+; GFX908-NEXT: s_xor_b64 s[18:19], s[22:23], -1
; GFX908-NEXT: .LBB3_10: ; %Flow19
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX908-NEXT: s_mov_b64 s[0:1], -1
-; GFX908-NEXT: s_and_b64 vcc, exec, s[16:17]
+; GFX908-NEXT: s_mov_b64 s[2:3], -1
+; GFX908-NEXT: s_and_b64 vcc, exec, s[18:19]
; GFX908-NEXT: s_cbranch_vccz .LBB3_1
; GFX908-NEXT: ; %bb.11: ; %bb12
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX908-NEXT: s_add_u32 s4, s4, s8
-; GFX908-NEXT: s_addc_u32 s5, s5, 0
-; GFX908-NEXT: s_add_u32 s10, s10, s12
-; GFX908-NEXT: s_addc_u32 s11, s11, s13
-; GFX908-NEXT: s_mov_b64 s[0:1], 0
+; GFX908-NEXT: s_add_u32 s10, s10, s8
+; GFX908-NEXT: s_addc_u32 s11, s11, 0
+; GFX908-NEXT: s_add_u32 s14, s14, s16
+; GFX908-NEXT: s_addc_u32 s15, s15, s17
+; GFX908-NEXT: s_mov_b64 s[2:3], 0
; GFX908-NEXT: s_branch .LBB3_1
; GFX908-NEXT: .LBB3_12: ; %DummyReturnBlock
; GFX908-NEXT: s_endpgm
@@ -677,111 +679,113 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-LABEL: introduced_copy_to_sgpr:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: global_load_ushort v18, v[0:1], off glc
-; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
-; GFX90A-NEXT: s_load_dword s7, s[8:9], 0x18
-; GFX90A-NEXT: s_mov_b32 s6, 0
-; GFX90A-NEXT: s_mov_b32 s9, s6
+; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
+; GFX90A-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x10
+; GFX90A-NEXT: s_load_dword s0, s[8:9], 0x18
+; GFX90A-NEXT: s_mov_b32 s12, 0
+; GFX90A-NEXT: s_mov_b32 s9, s12
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3
-; GFX90A-NEXT: s_sub_i32 s8, 0, s3
+; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s7
+; GFX90A-NEXT: s_sub_i32 s1, 0, s7
; GFX90A-NEXT: v_mov_b32_e32 v19, 0
; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GFX90A-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GFX90A-NEXT: v_cvt_u32_f32_e32 v3, v2
-; GFX90A-NEXT: v_cvt_f32_f16_e32 v2, s7
-; GFX90A-NEXT: v_readfirstlane_b32 s10, v3
-; GFX90A-NEXT: s_mul_i32 s8, s8, s10
-; GFX90A-NEXT: s_mul_hi_u32 s8, s10, s8
-; GFX90A-NEXT: s_add_i32 s10, s10, s8
-; GFX90A-NEXT: s_mul_hi_u32 s8, s2, s10
-; GFX90A-NEXT: s_mul_i32 s10, s8, s3
-; GFX90A-NEXT: s_sub_i32 s2, s2, s10
-; GFX90A-NEXT: s_add_i32 s11, s8, 1
-; GFX90A-NEXT: s_sub_i32 s10, s2, s3
-; GFX90A-NEXT: s_cmp_ge_u32 s2, s3
-; GFX90A-NEXT: s_cselect_b32 s8, s11, s8
-; GFX90A-NEXT: s_cselect_b32 s2, s10, s2
-; GFX90A-NEXT: s_add_i32 s10, s8, 1
-; GFX90A-NEXT: s_cmp_ge_u32 s2, s3
-; GFX90A-NEXT: s_cselect_b32 s8, s10, s8
-; GFX90A-NEXT: s_lshr_b32 s7, s7, 16
-; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s7
-; GFX90A-NEXT: s_lshl_b64 s[2:3], s[0:1], 5
-; GFX90A-NEXT: s_lshl_b64 s[12:13], s[8:9], 5
-; GFX90A-NEXT: s_lshl_b64 s[10:11], s[4:5], 5
-; GFX90A-NEXT: s_or_b32 s10, s10, 28
+; GFX90A-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX90A-NEXT: v_readfirstlane_b32 s2, v3
+; GFX90A-NEXT: s_mul_i32 s1, s1, s2
+; GFX90A-NEXT: s_mul_hi_u32 s1, s2, s1
+; GFX90A-NEXT: s_add_i32 s2, s2, s1
+; GFX90A-NEXT: s_mul_hi_u32 s1, s6, s2
+; GFX90A-NEXT: s_mul_i32 s2, s1, s7
+; GFX90A-NEXT: s_sub_i32 s2, s6, s2
+; GFX90A-NEXT: s_add_i32 s3, s1, 1
+; GFX90A-NEXT: s_sub_i32 s6, s2, s7
+; GFX90A-NEXT: s_cmp_ge_u32 s2, s7
+; GFX90A-NEXT: s_cselect_b32 s1, s3, s1
+; GFX90A-NEXT: s_cselect_b32 s2, s6, s2
+; GFX90A-NEXT: s_add_i32 s3, s1, 1
+; GFX90A-NEXT: s_cmp_ge_u32 s2, s7
+; GFX90A-NEXT: s_cselect_b32 s8, s3, s1
+; GFX90A-NEXT: s_lshr_b32 s2, s0, 16
+; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s2
+; GFX90A-NEXT: s_lshl_b64 s[6:7], s[4:5], 5
+; GFX90A-NEXT: s_lshl_b64 s[14:15], s[10:11], 5
+; GFX90A-NEXT: s_and_b64 s[0:1], exec, s[0:1]
+; GFX90A-NEXT: s_or_b32 s14, s14, 28
+; GFX90A-NEXT: s_lshl_b64 s[16:17], s[8:9], 5
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: v_readfirstlane_b32 s7, v18
-; GFX90A-NEXT: s_and_b32 s7, 0xffff, s7
-; GFX90A-NEXT: s_mul_i32 s1, s1, s7
-; GFX90A-NEXT: s_mul_hi_u32 s9, s0, s7
-; GFX90A-NEXT: s_mul_i32 s0, s0, s7
-; GFX90A-NEXT: s_add_i32 s1, s9, s1
-; GFX90A-NEXT: s_lshl_b64 s[14:15], s[0:1], 5
+; GFX90A-NEXT: v_readfirstlane_b32 s2, v18
+; GFX90A-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX90A-NEXT: s_mul_i32 s3, s5, s2
+; GFX90A-NEXT: s_mul_hi_u32 s5, s4, s2
+; GFX90A-NEXT: s_mul_i32 s2, s4, s2
+; GFX90A-NEXT: s_add_i32 s3, s5, s3
+; GFX90A-NEXT: s_lshl_b64 s[4:5], s[2:3], 5
; GFX90A-NEXT: s_branch .LBB3_2
; GFX90A-NEXT: .LBB3_1: ; %Flow20
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[0:1]
+; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[2:3]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_12
; GFX90A-NEXT: .LBB3_2: ; %bb9
; GFX90A-NEXT: ; =>This Loop Header: Depth=1
; GFX90A-NEXT: ; Child Loop BB3_5 Depth 2
-; GFX90A-NEXT: s_mov_b64 s[16:17], -1
-; GFX90A-NEXT: s_cbranch_scc0 .LBB3_10
+; GFX90A-NEXT: s_mov_b64 s[18:19], -1
+; GFX90A-NEXT: s_mov_b64 vcc, s[0:1]
+; GFX90A-NEXT: s_cbranch_vccz .LBB3_10
; GFX90A-NEXT: ; %bb.3: ; %bb14
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
-; GFX90A-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
-; GFX90A-NEXT: s_mov_b32 s7, s6
-; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1]
-; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
-; GFX90A-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v8
-; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[6:7], s[6:7] op_sel:[0,1]
-; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
-; GFX90A-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
-; GFX90A-NEXT: s_mov_b64 s[18:19], s[10:11]
+; GFX90A-NEXT: v_cmp_gt_i64_e64 s[2:3], s[10:11], -1
+; GFX90A-NEXT: s_mov_b32 s13, s12
+; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[2:3]
+; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[12:13], s[12:13] op_sel:[0,1]
+; GFX90A-NEXT: v_cmp_ne_u32_e64 s[2:3], 1, v8
+; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[12:13], s[12:13] op_sel:[0,1]
+; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[12:13], s[12:13] op_sel:[0,1]
+; GFX90A-NEXT: v_cmp_lt_i64_e64 s[18:19], s[10:11], 0
+; GFX90A-NEXT: s_mov_b64 s[20:21], s[14:15]
; GFX90A-NEXT: v_pk_mov_b32 v[12:13], v[6:7], v[6:7] op_sel:[0,1]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: v_readfirstlane_b32 s7, v4
-; GFX90A-NEXT: v_readfirstlane_b32 s9, v5
-; GFX90A-NEXT: s_add_u32 s7, s7, 1
-; GFX90A-NEXT: s_addc_u32 s9, s9, 0
-; GFX90A-NEXT: s_mul_hi_u32 s20, s2, s7
-; GFX90A-NEXT: s_mul_i32 s9, s2, s9
-; GFX90A-NEXT: s_mul_i32 s21, s3, s7
-; GFX90A-NEXT: s_add_i32 s9, s20, s9
-; GFX90A-NEXT: s_mul_i32 s7, s2, s7
-; GFX90A-NEXT: s_add_i32 s9, s9, s21
+; GFX90A-NEXT: v_readfirstlane_b32 s9, v4
+; GFX90A-NEXT: v_readfirstlane_b32 s13, v5
+; GFX90A-NEXT: s_add_u32 s9, s9, 1
+; GFX90A-NEXT: s_addc_u32 s13, s13, 0
+; GFX90A-NEXT: s_mul_hi_u32 s22, s6, s9
+; GFX90A-NEXT: s_mul_i32 s13, s6, s13
+; GFX90A-NEXT: s_mul_i32 s23, s7, s9
+; GFX90A-NEXT: s_add_i32 s13, s22, s13
+; GFX90A-NEXT: s_mul_i32 s9, s6, s9
+; GFX90A-NEXT: s_add_i32 s13, s13, s23
; GFX90A-NEXT: s_branch .LBB3_5
; GFX90A-NEXT: .LBB3_4: ; %bb58
; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2
; GFX90A-NEXT: v_add_co_u32_sdwa v4, vcc, v4, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX90A-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
-; GFX90A-NEXT: s_add_u32 s18, s18, s14
-; GFX90A-NEXT: s_addc_u32 s19, s19, s15
-; GFX90A-NEXT: v_cmp_lt_i64_e64 s[22:23], -1, v[4:5]
-; GFX90A-NEXT: s_mov_b64 s[20:21], 0
-; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[22:23]
+; GFX90A-NEXT: s_add_u32 s20, s20, s4
+; GFX90A-NEXT: s_addc_u32 s21, s21, s5
+; GFX90A-NEXT: v_cmp_lt_i64_e64 s[24:25], -1, v[4:5]
+; GFX90A-NEXT: s_mov_b64 s[22:23], 0
+; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[24:25]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_9
; GFX90A-NEXT: .LBB3_5: ; %bb16
; GFX90A-NEXT: ; Parent Loop BB3_2 Depth=1
; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
-; GFX90A-NEXT: s_add_u32 s20, s18, s7
-; GFX90A-NEXT: s_addc_u32 s21, s19, s9
-; GFX90A-NEXT: global_load_dword v21, v19, s[20:21] offset:-12 glc
+; GFX90A-NEXT: s_add_u32 s22, s20, s9
+; GFX90A-NEXT: s_addc_u32 s23, s21, s13
+; GFX90A-NEXT: global_load_dword v21, v19, s[22:23] offset:-12 glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: global_load_dword v20, v19, s[20:21] offset:-8 glc
+; GFX90A-NEXT: global_load_dword v20, v19, s[22:23] offset:-8 glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: global_load_dword v14, v19, s[20:21] offset:-4 glc
+; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] offset:-4 glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: global_load_dword v14, v19, s[20:21] glc
+; GFX90A-NEXT: global_load_dword v14, v19, s[22:23] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: ds_read_b64 v[14:15], v19
; GFX90A-NEXT: ds_read_b64 v[16:17], v0
-; GFX90A-NEXT: s_and_b64 vcc, exec, s[0:1]
-; GFX90A-NEXT: ; kill: killed $sgpr20 killed $sgpr21
+; GFX90A-NEXT: s_and_b64 vcc, exec, s[2:3]
+; GFX90A-NEXT: ; kill: killed $sgpr22 killed $sgpr23
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_cbranch_vccnz .LBB3_7
; GFX90A-NEXT: ; %bb.6: ; %bb51
@@ -800,28 +804,28 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15]
; GFX90A-NEXT: s_branch .LBB3_4
; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
-; GFX90A-NEXT: s_mov_b64 s[20:21], s[16:17]
-; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[20:21]
+; GFX90A-NEXT: s_mov_b64 s[22:23], s[18:19]
+; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[22:23]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_4
; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
-; GFX90A-NEXT: s_mov_b64 s[20:21], -1
+; GFX90A-NEXT: s_mov_b64 s[22:23], -1
; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GFX90A-NEXT: ; implicit-def: $sgpr18_sgpr19
+; GFX90A-NEXT: ; implicit-def: $sgpr20_sgpr21
; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX90A-NEXT: s_xor_b64 s[16:17], s[20:21], -1
+; GFX90A-NEXT: s_xor_b64 s[18:19], s[22:23], -1
; GFX90A-NEXT: .LBB3_10: ; %Flow19
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX90A-NEXT: s_mov_b64 s[0:1], -1
-; GFX90A-NEXT: s_and_b64 vcc, exec, s[16:17]
+; GFX90A-NEXT: s_mov_b64 s[2:3], -1
+; GFX90A-NEXT: s_and_b64 vcc, exec, s[18:19]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
; GFX90A-NEXT: ; %bb.11: ; %bb12
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX90A-NEXT: s_add_u32 s4, s4, s8
-; GFX90A-NEXT: s_addc_u32 s5, s5, 0
-; GFX90A-NEXT: s_add_u32 s10, s10, s12
-; GFX90A-NEXT: s_addc_u32 s11, s11, s13
-; GFX90A-NEXT: s_mov_b64 s[0:1], 0
+; GFX90A-NEXT: s_add_u32 s10, s10, s8
+; GFX90A-NEXT: s_addc_u32 s11, s11, 0
+; GFX90A-NEXT: s_add_u32 s14, s14, s16
+; GFX90A-NEXT: s_addc_u32 s15, s15, s17
+; GFX90A-NEXT: s_mov_b64 s[2:3], 0
; GFX90A-NEXT: s_branch .LBB3_1
; GFX90A-NEXT: .LBB3_12: ; %DummyReturnBlock
; GFX90A-NEXT: s_endpgm
@@ -834,7 +838,8 @@ bb:
bb9: ; preds = %bb12, %bb
%i10 = phi i64 [ %arg3, %bb ], [ %i13, %bb12 ]
- br i1 undef, label %bb14, label %bb12
+ %undef = freeze i1 poison
+ br i1 %undef, label %bb14, label %bb12
bb12: ; preds = %bb58, %bb9
%i13 = add nuw nsw i64 %i10, %i8
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index d506c8c4b8779..7fdc012d4f1b5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -55,7 +55,7 @@ define <2 x i32> @select_sdiv_lhs_const_v2i32(i1 %cond) {
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
; GCN-NEXT: v_mov_b32_e32 v1, 0x594
; GCN-NEXT: s_setpc_b64 s[30:31]
- %select = select i1 %cond, <2 x i32> <i32 5, i32 undef>, <2 x i32> <i32 6, i32 7>
+ %select = select i1 %cond, <2 x i32> <i32 5, i32 poison>, <2 x i32> <i32 6, i32 7>
%op = sdiv <2 x i32> <i32 3333, i32 9999>, %select
ret <2 x i32> %op
}
diff --git a/llvm/test/CodeGen/AMDGPU/andorbitset.ll b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
index 13daedf987229..a60d14cd46573 100644
--- a/llvm/test/CodeGen/AMDGPU/andorbitset.ll
+++ b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
@@ -106,8 +106,8 @@ define amdgpu_kernel void @s_set_midbit(ptr addrspace(1) %out, i32 %in) {
@gv = external addrspace(1) global i32
; Make sure there's no verifier error with an undef source.
-define void @bitset_verifier_error() local_unnamed_addr #0 {
-; SI-LABEL: bitset_verifier_error:
+define void @bitset_verifier_error_freeze_poison() local_unnamed_addr #0 {
+; SI-LABEL: bitset_verifier_error_freeze_poison:
; SI: ; %bb.0: ; %bb
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: s_getpc_b64 s[4:5]
@@ -128,13 +128,40 @@ define void @bitset_verifier_error() local_unnamed_addr #0 {
; SI-NEXT: ; %bb.1: ; %bb5
; SI-NEXT: .LBB6_2: ; %bb6
bb:
- %i = call float @llvm.fabs.f32(float undef) #0
+ %undef0 = freeze float poison
+ %i = call float @llvm.fabs.f32(float %undef0) #0
%i1 = bitcast float %i to i32
store i32 %i1, ptr addrspace(1) @gv
br label %bb2
bb2:
- %i3 = call float @llvm.fabs.f32(float undef) #0
+ %undef1 = freeze float poison
+ %i3 = call float @llvm.fabs.f32(float %undef1) #0
+ %i4 = fcmp fast ult float %i3, 0x3FEFF7CEE0000000
+ br i1 %i4, label %bb5, label %bb6
+
+bb5:
+ unreachable
+
+bb6:
+ unreachable
+}
+
+define void @bitset_verifier_error_poison() local_unnamed_addr #0 {
+; SI-LABEL: bitset_verifier_error_poison:
+; SI: ; %bb.0: ; %bb
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: s_cbranch_scc1 .LBB7_2
+; SI-NEXT: ; %bb.1: ; %bb5
+; SI-NEXT: .LBB7_2: ; %bb6
+bb:
+ %i = call float @llvm.fabs.f32(float poison) #0
+ %i1 = bitcast float %i to i32
+ store i32 %i1, ptr addrspace(1) @gv
+ br label %bb2
+
+bb2:
+ %i3 = call float @llvm.fabs.f32(float poison) #0
%i4 = fcmp fast ult float %i3, 0x3FEFF7CEE0000000
br i1 %i4, label %bb5, label %bb6
diff --git a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
index f8a1604351d9e..29d929995bf88 100644
--- a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
@@ -37,11 +37,12 @@ bb2:
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
bb0:
%tmp = icmp sgt i32 %arg1, 4
- %undef = call i1 @llvm.amdgcn.class.f32(float poison, i32 undef)
- %tmp4 = select i1 %undef, float %arg, float 1.000000e+00
+ %mask = freeze i32 poison
+ %undef0 = call i1 @llvm.amdgcn.class.f32(float poison, i32 %mask)
+ %tmp4 = select i1 %undef0, float %arg, float 1.000000e+00
%tmp5 = fcmp ogt float %arg2, 0.000000e+00
%tmp6 = fcmp olt float %arg2, 1.000000e+00
- %tmp7 = fcmp olt float %arg, undef
+ %tmp7 = fcmp olt float %arg, poison
%tmp8 = and i1 %tmp5, %tmp6
%tmp9 = and i1 %tmp8, %tmp7
br i1 %tmp9, label %bb1, label %bb2
diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
index 434fc764e1fa6..f0e7cba6924d8 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -4,29 +4,34 @@
; Test that unused lanes in the s_xor result are masked out with v_cndmask.
-define i32 @combine_add_zext_xor() {
+define i32 @combine_add_zext_xor(i32 inreg %cond) {
; GFX1010-LABEL: combine_add_zext_xor:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
+; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
+; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
; GFX1010-NEXT: s_branch .LBB0_2
; GFX1010-NEXT: .LBB0_1: ; %bb9
; GFX1010-NEXT: ; in Loop: Header=BB0_2 Depth=1
-; GFX1010-NEXT: s_xor_b32 s4, s4, -1
+; GFX1010-NEXT: s_xor_b32 s5, s5, -1
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
-; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s5
; GFX1010-NEXT: v_add_nc_u32_e32 v2, v1, v0
; GFX1010-NEXT: v_mov_b32_e32 v1, v2
; GFX1010-NEXT: s_cbranch_vccz .LBB0_4
; GFX1010-NEXT: .LBB0_2: ; %.a
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1010-NEXT: ; implicit-def: $sgpr4
-; GFX1010-NEXT: s_cbranch_scc1 .LBB0_1
+; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
+; GFX1010-NEXT: ; implicit-def: $sgpr5
+; GFX1010-NEXT: s_cbranch_vccnz .LBB0_1
; GFX1010-NEXT: ; %bb.3: ; %bb
; GFX1010-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GFX1010-NEXT: buffer_load_dword v0, v1, s[4:7], 64 offen glc
; GFX1010-NEXT: s_waitcnt vmcnt(0)
-; GFX1010-NEXT: v_cmp_eq_u32_e64 s4, 0, v0
+; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
; GFX1010-NEXT: s_branch .LBB0_1
; GFX1010-NEXT: .LBB0_4: ; %.exit
; GFX1010-NEXT: s_setpc_b64 s[30:31]
@@ -34,27 +39,32 @@ define i32 @combine_add_zext_xor() {
; GFX1100-LABEL: combine_add_zext_xor:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
+; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
+; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
; GFX1100-NEXT: s_branch .LBB0_2
; GFX1100-NEXT: .LBB0_1: ; %bb9
; GFX1100-NEXT: ; in Loop: Header=BB0_2 Depth=1
-; GFX1100-NEXT: s_xor_b32 s0, s0, -1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: s_xor_b32 s1, s1, -1
; GFX1100-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
-; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-NEXT: v_add_nc_u32_e32 v2, v1, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-NEXT: v_mov_b32_e32 v1, v2
; GFX1100-NEXT: s_cbranch_vccz .LBB0_4
; GFX1100-NEXT: .LBB0_2: ; %.a
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1100-NEXT: ; implicit-def: $sgpr0
-; GFX1100-NEXT: s_cbranch_scc1 .LBB0_1
+; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX1100-NEXT: ; implicit-def: $sgpr1
+; GFX1100-NEXT: s_cbranch_vccnz .LBB0_1
; GFX1100-NEXT: ; %bb.3: ; %bb
; GFX1100-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GFX1100-NEXT: buffer_load_b32 v0, v1, s[0:3], 64 offen glc
; GFX1100-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
; GFX1100-NEXT: s_branch .LBB0_1
; GFX1100-NEXT: .LBB0_4: ; %.exit
; GFX1100-NEXT: s_setpc_b64 s[30:31]
@@ -63,7 +73,8 @@ define i32 @combine_add_zext_xor() {
.a: ; preds = %bb9, %.entry
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
- br i1 poison, label %bb9, label %bb
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %bb9, label %bb
bb: ; preds = %.a
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
@@ -84,29 +95,34 @@ bb9: ; preds = %bb, %.a
; Test that unused lanes in the s_xor result are masked out with v_cndmask.
-define i32 @combine_sub_zext_xor() {
+define i32 @combine_sub_zext_xor(i32 inreg %cond) {
; GFX1010-LABEL: combine_sub_zext_xor:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
+; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
+; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
; GFX1010-NEXT: s_branch .LBB1_2
; GFX1010-NEXT: .LBB1_1: ; %bb9
; GFX1010-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; GFX1010-NEXT: s_xor_b32 s4, s4, -1
+; GFX1010-NEXT: s_xor_b32 s5, s5, -1
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
-; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s5
; GFX1010-NEXT: v_sub_nc_u32_e32 v2, v1, v0
; GFX1010-NEXT: v_mov_b32_e32 v1, v2
; GFX1010-NEXT: s_cbranch_vccz .LBB1_4
; GFX1010-NEXT: .LBB1_2: ; %.a
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1010-NEXT: ; implicit-def: $sgpr4
-; GFX1010-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
+; GFX1010-NEXT: ; implicit-def: $sgpr5
+; GFX1010-NEXT: s_cbranch_vccnz .LBB1_1
; GFX1010-NEXT: ; %bb.3: ; %bb
; GFX1010-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GFX1010-NEXT: buffer_load_dword v0, v1, s[4:7], 64 offen glc
; GFX1010-NEXT: s_waitcnt vmcnt(0)
-; GFX1010-NEXT: v_cmp_eq_u32_e64 s4, 0, v0
+; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
; GFX1010-NEXT: s_branch .LBB1_1
; GFX1010-NEXT: .LBB1_4: ; %.exit
; GFX1010-NEXT: s_setpc_b64 s[30:31]
@@ -114,27 +130,32 @@ define i32 @combine_sub_zext_xor() {
; GFX1100-LABEL: combine_sub_zext_xor:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
+; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
+; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
; GFX1100-NEXT: s_branch .LBB1_2
; GFX1100-NEXT: .LBB1_1: ; %bb9
; GFX1100-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; GFX1100-NEXT: s_xor_b32 s0, s0, -1
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: s_xor_b32 s1, s1, -1
; GFX1100-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
-; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
+; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-NEXT: v_sub_nc_u32_e32 v2, v1, v0
-; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-NEXT: v_mov_b32_e32 v1, v2
; GFX1100-NEXT: s_cbranch_vccz .LBB1_4
; GFX1100-NEXT: .LBB1_2: ; %.a
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1100-NEXT: ; implicit-def: $sgpr0
-; GFX1100-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX1100-NEXT: ; implicit-def: $sgpr1
+; GFX1100-NEXT: s_cbranch_vccnz .LBB1_1
; GFX1100-NEXT: ; %bb.3: ; %bb
; GFX1100-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GFX1100-NEXT: buffer_load_b32 v0, v1, s[0:3], 64 offen glc
; GFX1100-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
; GFX1100-NEXT: s_branch .LBB1_1
; GFX1100-NEXT: .LBB1_4: ; %.exit
; GFX1100-NEXT: s_setpc_b64 s[30:31]
@@ -143,7 +164,8 @@ define i32 @combine_sub_zext_xor() {
.a: ; preds = %bb9, %.entry
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
- br i1 undef, label %bb9, label %bb
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %bb9, label %bb
bb: ; preds = %.a
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
@@ -164,60 +186,71 @@ bb9: ; preds = %bb, %.a
; Test that unused lanes in the s_or result are masked out with v_cndmask.
-define i32 @combine_add_zext_or() {
+define i32 @combine_add_zext_or(i32 inreg %cond) {
; GFX1010-LABEL: combine_add_zext_or:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT: s_mov_b32 s4, 0
+; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
+; GFX1010-NEXT: s_mov_b32 s5, 0
+; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
+; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
; GFX1010-NEXT: s_branch .LBB2_2
; GFX1010-NEXT: .LBB2_1: ; %bb9
; GFX1010-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX1010-NEXT: s_cmpk_gt_i32 s4, 0xfbe6
-; GFX1010-NEXT: s_cselect_b32 s6, -1, 0
-; GFX1010-NEXT: s_add_i32 s4, s4, 1
-; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s6
+; GFX1010-NEXT: s_cmpk_gt_i32 s5, 0xfbe6
+; GFX1010-NEXT: s_cselect_b32 s7, -1, 0
+; GFX1010-NEXT: s_add_i32 s5, s5, 1
+; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s7
; GFX1010-NEXT: s_cbranch_vccz .LBB2_4
; GFX1010-NEXT: .LBB2_2: ; %.a
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1010-NEXT: ; implicit-def: $sgpr5
-; GFX1010-NEXT: s_cbranch_scc1 .LBB2_1
+; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
+; GFX1010-NEXT: ; implicit-def: $sgpr6
+; GFX1010-NEXT: s_cbranch_vccnz .LBB2_1
; GFX1010-NEXT: ; %bb.3: ; %bb
; GFX1010-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX1010-NEXT: v_mov_b32_e32 v0, s4
+; GFX1010-NEXT: v_mov_b32_e32 v0, s5
; GFX1010-NEXT: buffer_load_dword v0, v0, s[4:7], 64 offen glc
; GFX1010-NEXT: s_waitcnt vmcnt(0)
-; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
+; GFX1010-NEXT: v_cmp_eq_u32_e64 s6, 0, v0
; GFX1010-NEXT: s_branch .LBB2_1
; GFX1010-NEXT: .LBB2_4: ; %.exit
-; GFX1010-NEXT: s_or_b32 s4, s5, s6
+; GFX1010-NEXT: s_or_b32 s4, s6, s7
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX1010-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: combine_add_zext_or:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_mov_b32 s0, 0
+; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
+; GFX1100-NEXT: s_mov_b32 s1, 0
+; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
+; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
; GFX1100-NEXT: s_branch .LBB2_2
; GFX1100-NEXT: .LBB2_1: ; %bb9
; GFX1100-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX1100-NEXT: s_cmpk_gt_i32 s0, 0xfbe6
-; GFX1100-NEXT: s_cselect_b32 s2, -1, 0
-; GFX1100-NEXT: s_add_i32 s0, s0, 1
-; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s2
+; GFX1100-NEXT: s_cmpk_gt_i32 s1, 0xfbe6
+; GFX1100-NEXT: s_cselect_b32 s3, -1, 0
+; GFX1100-NEXT: s_add_i32 s1, s1, 1
+; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s3
; GFX1100-NEXT: s_cbranch_vccz .LBB2_4
; GFX1100-NEXT: .LBB2_2: ; %.a
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1100-NEXT: ; implicit-def: $sgpr1
-; GFX1100-NEXT: s_cbranch_scc1 .LBB2_1
+; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX1100-NEXT: ; implicit-def: $sgpr2
+; GFX1100-NEXT: s_cbranch_vccnz .LBB2_1
; GFX1100-NEXT: ; %bb.3: ; %bb
; GFX1100-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX1100-NEXT: v_mov_b32_e32 v0, s0
+; GFX1100-NEXT: v_mov_b32_e32 v0, s1
; GFX1100-NEXT: buffer_load_b32 v0, v0, s[0:3], 64 offen glc
; GFX1100-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
+; GFX1100-NEXT: v_cmp_eq_u32_e64 s2, 0, v0
; GFX1100-NEXT: s_branch .LBB2_1
; GFX1100-NEXT: .LBB2_4: ; %.exit
-; GFX1100-NEXT: s_or_b32 s0, s1, s2
+; GFX1100-NEXT: s_or_b32 s0, s2, s3
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
@@ -226,7 +259,8 @@ define i32 @combine_add_zext_or() {
.a: ; preds = %bb9, %.entry
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
- br i1 undef, label %bb9, label %bb
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %bb9, label %bb
bb: ; preds = %.a
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
@@ -248,60 +282,71 @@ bb9: ; preds = %bb, %.a
; Test that unused lanes in the s_or result are masked out with v_cndmask.
-define i32 @combine_sub_zext_or() {
+define i32 @combine_sub_zext_or(i32 inreg %cond) {
; GFX1010-LABEL: combine_sub_zext_or:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1010-NEXT: s_mov_b32 s4, 0
+; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
+; GFX1010-NEXT: s_mov_b32 s5, 0
+; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
+; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
; GFX1010-NEXT: s_branch .LBB3_2
; GFX1010-NEXT: .LBB3_1: ; %bb9
; GFX1010-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX1010-NEXT: s_cmpk_gt_i32 s4, 0xfbe6
-; GFX1010-NEXT: s_cselect_b32 s6, -1, 0
-; GFX1010-NEXT: s_add_i32 s4, s4, -1
-; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s6
+; GFX1010-NEXT: s_cmpk_gt_i32 s5, 0xfbe6
+; GFX1010-NEXT: s_cselect_b32 s7, -1, 0
+; GFX1010-NEXT: s_add_i32 s5, s5, -1
+; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s7
; GFX1010-NEXT: s_cbranch_vccz .LBB3_4
; GFX1010-NEXT: .LBB3_2: ; %.a
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1010-NEXT: ; implicit-def: $sgpr5
-; GFX1010-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
+; GFX1010-NEXT: ; implicit-def: $sgpr6
+; GFX1010-NEXT: s_cbranch_vccnz .LBB3_1
; GFX1010-NEXT: ; %bb.3: ; %bb
; GFX1010-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX1010-NEXT: v_mov_b32_e32 v0, s4
+; GFX1010-NEXT: v_mov_b32_e32 v0, s5
; GFX1010-NEXT: buffer_load_dword v0, v0, s[4:7], 64 offen glc
; GFX1010-NEXT: s_waitcnt vmcnt(0)
-; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
+; GFX1010-NEXT: v_cmp_eq_u32_e64 s6, 0, v0
; GFX1010-NEXT: s_branch .LBB3_1
; GFX1010-NEXT: .LBB3_4: ; %.exit
-; GFX1010-NEXT: s_or_b32 s4, s5, s6
+; GFX1010-NEXT: s_or_b32 s4, s6, s7
; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
; GFX1010-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-LABEL: combine_sub_zext_or:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-NEXT: s_mov_b32 s0, 0
+; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
+; GFX1100-NEXT: s_mov_b32 s1, 0
+; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
+; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
; GFX1100-NEXT: s_branch .LBB3_2
; GFX1100-NEXT: .LBB3_1: ; %bb9
; GFX1100-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX1100-NEXT: s_cmpk_gt_i32 s0, 0xfbe6
-; GFX1100-NEXT: s_cselect_b32 s2, -1, 0
-; GFX1100-NEXT: s_add_i32 s0, s0, -1
-; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s2
+; GFX1100-NEXT: s_cmpk_gt_i32 s1, 0xfbe6
+; GFX1100-NEXT: s_cselect_b32 s3, -1, 0
+; GFX1100-NEXT: s_add_i32 s1, s1, -1
+; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s3
; GFX1100-NEXT: s_cbranch_vccz .LBB3_4
; GFX1100-NEXT: .LBB3_2: ; %.a
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1100-NEXT: ; implicit-def: $sgpr1
-; GFX1100-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX1100-NEXT: ; implicit-def: $sgpr2
+; GFX1100-NEXT: s_cbranch_vccnz .LBB3_1
; GFX1100-NEXT: ; %bb.3: ; %bb
; GFX1100-NEXT: ; in Loop: Header=BB3_2 Depth=1
-; GFX1100-NEXT: v_mov_b32_e32 v0, s0
+; GFX1100-NEXT: v_mov_b32_e32 v0, s1
; GFX1100-NEXT: buffer_load_b32 v0, v0, s[0:3], 64 offen glc
; GFX1100-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
+; GFX1100-NEXT: v_cmp_eq_u32_e64 s2, 0, v0
; GFX1100-NEXT: s_branch .LBB3_1
; GFX1100-NEXT: .LBB3_4: ; %.exit
-; GFX1100-NEXT: s_or_b32 s0, s1, s2
+; GFX1100-NEXT: s_or_b32 s0, s2, s3
; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; GFX1100-NEXT: s_setpc_b64 s[30:31]
@@ -310,7 +355,8 @@ define i32 @combine_sub_zext_or() {
.a: ; preds = %bb9, %.entry
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
- br i1 undef, label %bb9, label %bb
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %bb9, label %bb
bb: ; preds = %.a
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
@@ -332,28 +378,33 @@ bb9: ; preds = %bb, %.a
; Test that unused lanes in the s_and result are masked out with v_cndmask.
-define i32 @combine_add_zext_and() {
+define i32 @combine_add_zext_and(i32 inreg %cond) {
; GFX1010-LABEL: combine_add_zext_and:
; GFX1010: ; %bb.0: ; %.entry
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1010-NEXT: s_cmp_lg_u32 s16, 0
; GFX1010-NEXT: v_mov_b32_e32 v1, 0
+; GFX1010-NEXT: s_cselect_b32 s4, -1, 0
+; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1010-NEXT: v_cmp_ne_u32_e64 s4, 1, v0
; GFX1010-NEXT: s_branch .LBB4_2
; GFX1010-NEXT: .LBB4_1: ; %bb9
; GFX1010-NEXT: ; in Loop: Header=BB4_2 Depth=1
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
-; GFX1010-NEXT: s_and_b32 s4, s4, vcc_lo
-; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
+; GFX1010-NEXT: s_and_b32 s5, s5, vcc_lo
+; GFX1010-NEXT: v_cndmask_b32_e64 v0, 0, 1, s5
; GFX1010-NEXT: v_add_nc_u32_e32 v1, v1, v0
; GFX1010-NEXT: s_cbranch_vccz .LBB4_4
; GFX1010-NEXT: .LBB4_2: ; %.a
; GFX1010-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1010-NEXT: ; implicit-def: $sgpr4
-; GFX1010-NEXT: s_cbranch_scc1 .LBB4_1
+; GFX1010-NEXT: s_and_b32 vcc_lo, exec_lo, s4
+; GFX1010-NEXT: ; implicit-def: $sgpr5
+; GFX1010-NEXT: s_cbranch_vccnz .LBB4_1
; GFX1010-NEXT: ; %bb.3: ; %bb
; GFX1010-NEXT: ; in Loop: Header=BB4_2 Depth=1
; GFX1010-NEXT: buffer_load_dword v0, v1, s[4:7], 64 offen glc
; GFX1010-NEXT: s_waitcnt vmcnt(0)
-; GFX1010-NEXT: v_cmp_eq_u32_e64 s4, 0, v0
+; GFX1010-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
; GFX1010-NEXT: s_branch .LBB4_1
; GFX1010-NEXT: .LBB4_4: ; %.exit
; GFX1010-NEXT: s_setpc_b64 s[30:31]
@@ -361,26 +412,32 @@ define i32 @combine_add_zext_and() {
; GFX1100-LABEL: combine_add_zext_and:
; GFX1100: ; %bb.0: ; %.entry
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1100-NEXT: s_cmp_lg_u32 s0, 0
; GFX1100-NEXT: v_mov_b32_e32 v1, 0
+; GFX1100-NEXT: s_cselect_b32 s0, -1, 0
+; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX1100-NEXT: v_cmp_ne_u32_e64 s0, 1, v0
; GFX1100-NEXT: s_branch .LBB4_2
; GFX1100-NEXT: .LBB4_1: ; %bb9
; GFX1100-NEXT: ; in Loop: Header=BB4_2 Depth=1
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1100-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0xfffffbe6, v1
-; GFX1100-NEXT: s_and_b32 s0, s0, vcc_lo
-; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX1100-NEXT: s_and_b32 s1, s1, vcc_lo
+; GFX1100-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-NEXT: v_add_nc_u32_e32 v1, v1, v0
; GFX1100-NEXT: s_cbranch_vccz .LBB4_4
; GFX1100-NEXT: .LBB4_2: ; %.a
; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX1100-NEXT: ; implicit-def: $sgpr0
-; GFX1100-NEXT: s_cbranch_scc1 .LBB4_1
+; GFX1100-NEXT: s_and_b32 vcc_lo, exec_lo, s0
+; GFX1100-NEXT: ; implicit-def: $sgpr1
+; GFX1100-NEXT: s_cbranch_vccnz .LBB4_1
; GFX1100-NEXT: ; %bb.3: ; %bb
; GFX1100-NEXT: ; in Loop: Header=BB4_2 Depth=1
; GFX1100-NEXT: buffer_load_b32 v0, v1, s[0:3], 64 offen glc
; GFX1100-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX1100-NEXT: v_cmp_eq_u32_e64 s1, 0, v0
; GFX1100-NEXT: s_branch .LBB4_1
; GFX1100-NEXT: .LBB4_4: ; %.exit
; GFX1100-NEXT: s_setpc_b64 s[30:31]
@@ -389,7 +446,8 @@ define i32 @combine_add_zext_and() {
.a: ; preds = %bb9, %.entry
%.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ]
- br i1 undef, label %bb9, label %bb
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %bb9, label %bb
bb: ; preds = %.a
%.i3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) poison, i32 %.2, i32 64, i32 1)
diff --git a/llvm/test/CodeGen/AMDGPU/fold-fabs.ll b/llvm/test/CodeGen/AMDGPU/fold-fabs.ll
index 6a8594a168f03..1c6ab3c14da57 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-fabs.ll
@@ -81,7 +81,8 @@ define float @fold_abs_in_branch_undef(float %arg1, float %arg2) {
entry:
%0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2
%1 = fadd reassoc nnan nsz arcp contract afn float %0, %arg2
- %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float undef)
+ %undef = freeze float poison
+ %2 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %undef)
%3 = fmul reassoc nnan nsz arcp contract afn float %2, 2.000000e+00
%4 = fcmp ule float %3, 1.000000e+00
br i1 %4, label %if, label %exit
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll
index 338bea9d4f73f..cd0a15e4d7e2e 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll
@@ -2,12 +2,28 @@
; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SILowerI1Copies was not handling IMPLICIT_DEF
-; SI-LABEL: {{^}}br_implicit_def:
+; SI-LABEL: {{^}}br_poison:
; SI: %bb.0:
; SI-NEXT: s_cbranch_scc1
-define amdgpu_kernel void @br_implicit_def(ptr addrspace(1) %out, i32 %arg) #0 {
+define amdgpu_kernel void @br_poison(ptr addrspace(1) %out, i32 %arg) #0 {
bb:
- br i1 undef, label %bb1, label %bb2
+ br i1 poison, label %bb1, label %bb2
+
+bb1:
+ store volatile i32 123, ptr addrspace(1) %out
+ ret void
+
+bb2:
+ ret void
+}
+
+; SI-LABEL: {{^}}br_freeze_poison:
+; SI: %bb.0:
+; SI-NEXT: s_cbranch_scc1
+define amdgpu_kernel void @br_freeze_poison(ptr addrspace(1) %out, i32 %arg) #0 {
+bb:
+ %undef = freeze i1 poison
+ br i1 %undef, label %bb1, label %bb2
bb1:
store volatile i32 123, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
index 7a914c2322229..0251284696591 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
@@ -11,7 +11,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
- renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst (s32) from `ptr addrspace(42) undef`)
+ renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst (s32) from `ptr addrspace(42) poison`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr poison`)
@@ -30,7 +30,7 @@ body: |
$vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst (s32) into `ptr addrspace(42) undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst (s32) into `ptr addrspace(42) poison`)
S_ENDPGM 0
...
@@ -47,7 +47,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst (s32) on `ptr addrspace(42) undef`)
+ FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst (s32) on `ptr addrspace(42) poison`)
S_ENDPGM 0
...
@@ -63,7 +63,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst (s32) on `ptr addrspace(42) undef`)
+ FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst (s32) on `ptr addrspace(42) poison`)
S_ENDPGM 0
...
diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir
index 2b3851c348d55..40c47f0e979fb 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir
@@ -19,7 +19,7 @@
# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 16, 24,
--- |
- @0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4
+ @0 = internal unnamed_addr addrspace(3) global [256 x float] poison, align 4
define amdgpu_kernel void @ds_combine_base_offset() {
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index 9fc6af6f0dd6a..42c6589f417ba 100644
--- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -689,7 +689,7 @@ divergent.ret:
; IR: UnifiedReturnBlock:
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64
; IR-NEXT: ret void
-define amdgpu_kernel void @multi_divergent_unreachable_exit() #0 {
+define amdgpu_kernel void @multi_divergent_unreachable_exit(i32 %switch) #0 {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
switch i32 %tmp, label %bb3 [
@@ -704,7 +704,7 @@ bb2: ; preds = %bb
unreachable
bb3: ; preds = %bb
- switch i32 undef, label %bb5 [
+ switch i32 %switch, label %bb5 [
i32 2, label %bb4
]
diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
index e1c2bde99eed2..9a2d969f94e3e 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
@@ -252,10 +252,10 @@ define amdgpu_kernel void @nested_loop_conditions(ptr addrspace(1) captures(none
; IR: [[BB21]]:
; IR-NEXT: [[MY_TMP22:%.*]] = extractelement <2 x i32> [[MY_TMP17]], i64 1
; IR-NEXT: [[MY_TMP23:%.*]] = lshr i32 [[MY_TMP22]], 16
-; IR-NEXT: [[MY_TMP24:%.*]] = select i1 undef, i32 undef, i32 [[MY_TMP23]]
+; IR-NEXT: [[MY_TMP24:%.*]] = select i1 false, i32 0, i32 [[MY_TMP23]]
; IR-NEXT: [[MY_TMP25:%.*]] = uitofp i32 [[MY_TMP24]] to float
; IR-NEXT: [[MY_TMP26:%.*]] = fmul float [[MY_TMP25]], 0x3EF0001000000000
-; IR-NEXT: [[MY_TMP27:%.*]] = fsub float [[MY_TMP26]], undef
+; IR-NEXT: [[MY_TMP27:%.*]] = fsub float [[MY_TMP26]], 0x7FF8000000000000
; IR-NEXT: [[MY_TMP28:%.*]] = fcmp olt float [[MY_TMP27]], 5.000000e-01
; IR-NEXT: [[MY_TMP29:%.*]] = select i1 [[MY_TMP28]], i64 1, i64 2
; IR-NEXT: [[MY_TMP30:%.*]] = extractelement <4 x i32> [[MY_TMP936]], i64 [[MY_TMP29]]
@@ -317,10 +317,10 @@ bb18: ; preds = %bb18, %bb16
bb21: ; preds = %bb18
%my.tmp22 = extractelement <2 x i32> %my.tmp17, i64 1
%my.tmp23 = lshr i32 %my.tmp22, 16
- %my.tmp24 = select i1 undef, i32 undef, i32 %my.tmp23
+ %my.tmp24 = select i1 false, i32 0, i32 %my.tmp23
%my.tmp25 = uitofp i32 %my.tmp24 to float
%my.tmp26 = fmul float %my.tmp25, 0x3EF0001000000000
- %my.tmp27 = fsub float %my.tmp26, undef
+ %my.tmp27 = fsub float %my.tmp26, 0x7FF8000000000000
%my.tmp28 = fcmp olt float %my.tmp27, 5.000000e-01
%my.tmp29 = select i1 %my.tmp28, i64 1, i64 2
%my.tmp30 = extractelement <4 x i32> %my.tmp936, i64 %my.tmp29
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
index 64a8f5484673f..c5732531f5423 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
@@ -93,7 +93,7 @@ entry:
%conv = add i32 %i6, %i7
%conv.frozen = freeze i32 %conv
%div = udiv i32 %conv.frozen, 49
- %add.ptr22 = getelementptr inbounds float, ptr addrspace(4) %wei_ptr, i64 undef
+ %add.ptr22 = getelementptr inbounds float, ptr addrspace(4) %wei_ptr, i64 0
%in.ptr1 = getelementptr inbounds float, ptr addrspace(1) %in, i32 %i5
br label %for.cond28.preheader
@@ -530,11 +530,11 @@ for.cond28.preheader: ; preds = %for.cond28.preheade
br i1 %exitcond.not, label %for.cond.cleanup26, label %for.cond28.preheader
for.cond.cleanup26: ; preds = %for.cond28.preheader
- %mul119 = shl nuw nsw i32 undef, 1
+ %mul119 = shl nuw nsw i32 0, 1
%mul120 = mul i32 %div, 200704
- %mul121 = mul i32 undef, 6272
+ %mul121 = mul i32 0, 6272
%add122 = add i32 %mul120, %mul121
- %mul123 = mul nuw nsw i32 undef, 28
+ %mul123 = mul nuw nsw i32 0, 28
%add124 = add i32 %add122, %mul123
%add126 = add i32 %add124, %mul119
%idx.ext127 = zext i32 %add126 to i64
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
index 1620e2778223c..522b46526f0b9 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
@@ -2,7 +2,6 @@
; RUN: opt -mtriple=amdgcn-- -S -passes=structurizecfg,si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=OPT %s
; RUN: llc -mtriple=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
-
; OPT-LABEL: @annotate_unreachable_noloop(
; OPT-NOT: call i1 @llvm.amdgcn.loop
@@ -19,7 +18,7 @@ bb1: ; preds = %bb
%tmp2 = sext i32 %tmp to i64
%tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2
%tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16
- br i1 undef, label %bb5, label %bb3
+ br i1 poison, label %bb5, label %bb3
bb3: ; preds = %bb1
%tmp6 = extractelement <4 x float> %tmp4, i32 2
@@ -84,7 +83,8 @@ bb1: ; preds = %bb
%tmp2 = sext i32 %tmp to i64
%tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2
%tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16
- br i1 undef, label %bb5, label %bb3
+ %undef = freeze i1 poison
+ br i1 %undef, label %bb5, label %bb3
bb3: ; preds = %bb1
%tmp6 = extractelement <4 x float> %tmp4, i32 2
diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll b/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll
index 25592c8ac8072..88daad2bf6949 100644
--- a/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll
@@ -7,75 +7,75 @@
; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
; SI-NOT: v_readlane_b32 [[SAVED]]
-define amdgpu_ps void @main() #0 {
+define amdgpu_ps void @main(<4 x i32> inreg %rsrc) #0 {
main_body:
- %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 16, i32 0)
- %tmp1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 32, i32 0)
- %tmp2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 80, i32 0)
- %tmp3 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 84, i32 0)
- %tmp4 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 88, i32 0)
- %tmp5 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 96, i32 0)
- %tmp6 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 100, i32 0)
- %tmp7 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 104, i32 0)
- %tmp8 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 112, i32 0)
- %tmp9 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 116, i32 0)
- %tmp10 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 120, i32 0)
- %tmp11 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 128, i32 0)
- %tmp12 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 132, i32 0)
- %tmp13 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 136, i32 0)
- %tmp14 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 144, i32 0)
- %tmp15 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 148, i32 0)
- %tmp16 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 152, i32 0)
- %tmp17 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 160, i32 0)
- %tmp18 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 164, i32 0)
- %tmp19 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 168, i32 0)
- %tmp20 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 176, i32 0)
- %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 180, i32 0)
- %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 184, i32 0)
- %tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 192, i32 0)
- %tmp24 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 196, i32 0)
- %tmp25 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 200, i32 0)
- %tmp26 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 208, i32 0)
- %tmp27 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 212, i32 0)
- %tmp28 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 216, i32 0)
- %tmp29 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 224, i32 0)
- %tmp30 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 228, i32 0)
- %tmp31 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 232, i32 0)
- %tmp32 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 240, i32 0)
- %tmp33 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 244, i32 0)
- %tmp34 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 248, i32 0)
- %tmp35 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 256, i32 0)
- %tmp36 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 260, i32 0)
- %tmp37 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 264, i32 0)
- %tmp38 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 272, i32 0)
- %tmp39 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 276, i32 0)
- %tmp40 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 280, i32 0)
- %tmp41 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 288, i32 0)
- %tmp42 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 292, i32 0)
- %tmp43 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 296, i32 0)
- %tmp44 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 304, i32 0)
- %tmp45 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 308, i32 0)
- %tmp46 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 312, i32 0)
- %tmp47 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 320, i32 0)
- %tmp48 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 324, i32 0)
- %tmp49 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 328, i32 0)
- %tmp50 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 336, i32 0)
- %tmp51 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 340, i32 0)
- %tmp52 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 344, i32 0)
- %tmp53 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 352, i32 0)
- %tmp54 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 356, i32 0)
- %tmp55 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 360, i32 0)
- %tmp56 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 368, i32 0)
- %tmp57 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 372, i32 0)
- %tmp58 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 376, i32 0)
- %tmp59 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 384, i32 0)
- %tmp60 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 388, i32 0)
- %tmp61 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 392, i32 0)
- %tmp62 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 400, i32 0)
- %tmp63 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 404, i32 0)
- %tmp64 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 408, i32 0)
- %tmp65 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 416, i32 0)
- %tmp66 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> poison, i32 420, i32 0)
+ %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 16, i32 0)
+ %tmp1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 32, i32 0)
+ %tmp2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 80, i32 0)
+ %tmp3 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 84, i32 0)
+ %tmp4 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 88, i32 0)
+ %tmp5 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 96, i32 0)
+ %tmp6 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 100, i32 0)
+ %tmp7 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 104, i32 0)
+ %tmp8 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 112, i32 0)
+ %tmp9 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 116, i32 0)
+ %tmp10 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 120, i32 0)
+ %tmp11 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 128, i32 0)
+ %tmp12 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 132, i32 0)
+ %tmp13 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 136, i32 0)
+ %tmp14 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 144, i32 0)
+ %tmp15 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 148, i32 0)
+ %tmp16 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 152, i32 0)
+ %tmp17 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 160, i32 0)
+ %tmp18 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 164, i32 0)
+ %tmp19 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 168, i32 0)
+ %tmp20 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 176, i32 0)
+ %tmp21 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 180, i32 0)
+ %tmp22 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 184, i32 0)
+ %tmp23 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 192, i32 0)
+ %tmp24 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 196, i32 0)
+ %tmp25 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 200, i32 0)
+ %tmp26 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 208, i32 0)
+ %tmp27 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 212, i32 0)
+ %tmp28 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 216, i32 0)
+ %tmp29 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 224, i32 0)
+ %tmp30 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 228, i32 0)
+ %tmp31 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 232, i32 0)
+ %tmp32 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 240, i32 0)
+ %tmp33 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 244, i32 0)
+ %tmp34 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 248, i32 0)
+ %tmp35 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 256, i32 0)
+ %tmp36 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 260, i32 0)
+ %tmp37 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 264, i32 0)
+ %tmp38 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 272, i32 0)
+ %tmp39 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 276, i32 0)
+ %tmp40 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 280, i32 0)
+ %tmp41 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 288, i32 0)
+ %tmp42 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 292, i32 0)
+ %tmp43 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 296, i32 0)
+ %tmp44 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 304, i32 0)
+ %tmp45 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 308, i32 0)
+ %tmp46 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 312, i32 0)
+ %tmp47 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 320, i32 0)
+ %tmp48 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 324, i32 0)
+ %tmp49 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 328, i32 0)
+ %tmp50 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 336, i32 0)
+ %tmp51 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 340, i32 0)
+ %tmp52 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 344, i32 0)
+ %tmp53 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 352, i32 0)
+ %tmp54 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 356, i32 0)
+ %tmp55 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 360, i32 0)
+ %tmp56 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 368, i32 0)
+ %tmp57 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 372, i32 0)
+ %tmp58 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 376, i32 0)
+ %tmp59 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 384, i32 0)
+ %tmp60 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 388, i32 0)
+ %tmp61 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 392, i32 0)
+ %tmp62 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 400, i32 0)
+ %tmp63 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 404, i32 0)
+ %tmp64 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 408, i32 0)
+ %tmp65 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 416, i32 0)
+ %tmp66 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 420, i32 0)
br label %LOOP
LOOP: ; preds = %ENDIF2795, %main_body
@@ -90,7 +90,7 @@ ENDLOOP: ; preds = %ELSE2566, %LOOP
%one.sub.ac.i = fmul float %one.sub.a.i, 0x7FF8000000000000
%fmul = fmul float 0x7FF8000000000000, 0x7FF8000000000000
%result.i = fadd float %fmul, %one.sub.ac.i
- call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float poison, float %result.i, float 0x7FF8000000000000, float 1.000000e+00, i1 true, i1 true) #0
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float poison, float %result.i, float poison, float 1.000000e+00, i1 true, i1 true) #0
ret void
ENDIF: ; preds = %LOOP
@@ -107,9 +107,9 @@ ENDIF: ; preds = %LOOP
%tmp78 = call float @llvm.minnum.f32(float %tmp73, float %tmp77)
%tmp79 = call float @llvm.maxnum.f32(float %tmp71, float 0.000000e+00)
%tmp80 = call float @llvm.maxnum.f32(float %tmp72, float %tmp76)
- %tmp81 = call float @llvm.maxnum.f32(float poison, float %tmp78)
+ %tmp81 = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %tmp78)
%tmp82 = call float @llvm.minnum.f32(float %tmp79, float %tmp80)
- %tmp83 = call float @llvm.minnum.f32(float %tmp82, float poison)
+ %tmp83 = call float @llvm.minnum.f32(float %tmp82, float 0x7FF8000000000000)
%tmp84 = fsub float %tmp14, 0x7FF8000000000000
%tmp85 = fsub float %tmp15, 0x7FF8000000000000
%tmp86 = fsub float %tmp16, 0x7FF8000000000000
@@ -125,19 +125,19 @@ ENDIF: ; preds = %LOOP
%tmp96 = call float @llvm.minnum.f32(float %tmp88, float %tmp94)
%tmp97 = call float @llvm.maxnum.f32(float %tmp87, float %tmp93)
%tmp98 = call float @llvm.maxnum.f32(float %tmp89, float %tmp95)
- %tmp99 = call float @llvm.maxnum.f32(float poison, float %tmp96)
- %tmp100 = call float @llvm.maxnum.f32(float %tmp99, float poison)
- %tmp101 = call float @llvm.minnum.f32(float %tmp97, float poison)
+ %tmp99 = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %tmp96)
+ %tmp100 = call float @llvm.maxnum.f32(float %tmp99, float 0x7FF8000000000000)
+ %tmp101 = call float @llvm.minnum.f32(float %tmp97, float 0x7FF8000000000000)
%tmp102 = call float @llvm.minnum.f32(float %tmp101, float %tmp98)
%tmp103 = fsub float %tmp30, 0x7FF8000000000000
%tmp104 = fsub float %tmp31, 0x7FF8000000000000
%tmp105 = fmul float %tmp103, 0.000000e+00
%tmp106 = fmul float %tmp104, 0.000000e+00
- %tmp107 = call float @llvm.minnum.f32(float poison, float %tmp105)
- %tmp108 = call float @llvm.maxnum.f32(float poison, float %tmp106)
- %tmp109 = call float @llvm.maxnum.f32(float poison, float %tmp107)
- %tmp110 = call float @llvm.maxnum.f32(float %tmp109, float poison)
- %tmp111 = call float @llvm.minnum.f32(float poison, float %tmp108)
+ %tmp107 = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %tmp105)
+ %tmp108 = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %tmp106)
+ %tmp109 = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %tmp107)
+ %tmp110 = call float @llvm.maxnum.f32(float %tmp109, float 0x7FF8000000000000)
+ %tmp111 = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %tmp108)
%tmp112 = fsub float %tmp32, 0x7FF8000000000000
%tmp113 = fsub float %tmp33, 0x7FF8000000000000
%tmp114 = fsub float %tmp34, 0x7FF8000000000000
@@ -219,18 +219,20 @@ ENDIF: ; preds = %LOOP
%tmp190 = fmul float %tmp188, 0x7FF8000000000000
%tmp191 = call float @llvm.maxnum.f32(float %tmp184, float %tmp189)
%tmp192 = call float @llvm.maxnum.f32(float %tmp185, float %tmp190)
- %tmp193 = call float @llvm.maxnum.f32(float %tmp186, float poison)
+ %tmp193 = call float @llvm.maxnum.f32(float %tmp186, float 0x7FF8000000000000)
%tmp194 = call float @llvm.minnum.f32(float %tmp191, float %tmp192)
%tmp195 = call float @llvm.minnum.f32(float %tmp194, float %tmp193)
- %.temp292.7 = select i1 undef, float %tmp162, float poison
+ %undef0 = freeze i1 poison
+ %.temp292.7 = select i1 %undef0, float %tmp162, float 0x7FF8000000000000
%temp292.9 = select i1 false, float %tmp180, float %.temp292.7
- %.temp292.9 = select i1 undef, float poison, float %temp292.9
+ %undef1 = freeze i1 poison
+ %.temp292.9 = select i1 %undef1, float 0x7FF8000000000000, float %temp292.9
%tmp196 = fcmp ogt float 0x7FF8000000000000, 0.000000e+00
%tmp197 = fcmp olt float 0x7FF8000000000000, %tmp195
%tmp198 = and i1 %tmp196, %tmp197
%tmp199 = fcmp olt float 0x7FF8000000000000, %.temp292.9
%tmp200 = and i1 %tmp198, %tmp199
- %temp292.11 = select i1 %tmp200, float poison, float %.temp292.9
+ %temp292.11 = select i1 %tmp200, float 0x7FF8000000000000, float %.temp292.9
%tid0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%cmp0 = icmp eq i32 %tid0, 0
br i1 %cmp0, label %IF2565, label %ELSE2566
@@ -238,7 +240,17 @@ ENDIF: ; preds = %LOOP
IF2565: ; preds = %ENDIF
%tid1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%cmp1 = icmp eq i32 %tid1, 0
- br i1 %cmp1, label %ENDIF2582, label %ELSE2584
+ %tmp212 = fadd float %tmp1, 0x7FF8000000000000
+ %tmp213 = fadd float 0.000000e+00, %tmp212
+ %floor = call float @llvm.floor.f32(float %tmp213)
+ %tmp214 = fsub float %tmp213, %floor
+ %tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+ %cmp4 = icmp eq i32 %tid4, 0
+ %tmp215 = fsub float 1.000000e+00, %tmp214
+ %tmp216 = call float @llvm.sqrt.f32(float %tmp215)
+ %tmp217 = fmul float %tmp216, 0x7FF8000000000000
+ %tmp218 = fadd float %tmp217, 0x7FF8000000000000
+ br label %ENDIF2564
ELSE2566: ; preds = %ENDIF
%tid2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
@@ -246,14 +258,14 @@ ELSE2566: ; preds = %ENDIF
%tmp201 = fcmp oeq float %temp292.11, %tidf
br i1 %tmp201, label %ENDLOOP, label %ELSE2593
-ENDIF2564: ; preds = %ENDIF2594, %ENDIF2588
- %temp894.1 = phi float [ poison, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
- %temp18.1 = phi float [ %tmp218, %ENDIF2588 ], [ poison, %ENDIF2594 ]
+ENDIF2564: ; preds = %ENDIF2594, %IF2565
+ %temp894.1 = phi float [ poison, %IF2565 ], [ %temp894.2, %ENDIF2594 ]
+ %temp18.1 = phi float [ %tmp218, %IF2565 ], [ poison, %ENDIF2594 ]
%tmp202 = fsub float %tmp5, 0x7FF8000000000000
%tmp203 = fmul float %tmp202, 0x7FF8000000000000
- %tmp204 = call float @llvm.maxnum.f32(float poison, float %tmp203)
- %tmp205 = call float @llvm.minnum.f32(float %tmp204, float poison)
- %tmp206 = call float @llvm.minnum.f32(float %tmp205, float poison)
+ %tmp204 = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %tmp203)
+ %tmp205 = call float @llvm.minnum.f32(float %tmp204, float 0x7FF8000000000000)
+ %tmp206 = call float @llvm.minnum.f32(float %tmp205, float 0x7FF8000000000000)
%tmp207 = fcmp ogt float 0x7FF8000000000000, 0.000000e+00
%tmp208 = fcmp olt float 0x7FF8000000000000, 1.000000e+00
%tmp209 = and i1 %tmp207, %tmp208
@@ -263,31 +275,6 @@ ENDIF2564: ; preds = %ENDIF2594, %ENDIF25
%tmp211 = and i1 %tmp209, %tmp210
br i1 %tmp211, label %ENDIF2795, label %ELSE2797
-ELSE2584: ; preds = %IF2565
- br label %ENDIF2582
-
-ENDIF2582: ; preds = %ELSE2584, %IF2565
- %tmp212 = fadd float %tmp1, 0x7FF8000000000000
- %tmp213 = fadd float 0.000000e+00, %tmp212
- %floor = call float @llvm.floor.f32(float %tmp213)
- %tmp214 = fsub float %tmp213, %floor
- %tid4 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
- %cmp4 = icmp eq i32 %tid4, 0
- br i1 %cmp4, label %IF2589, label %ELSE2590
-
-IF2589: ; preds = %ENDIF2582
- br label %ENDIF2588
-
-ELSE2590: ; preds = %ENDIF2582
- br label %ENDIF2588
-
-ENDIF2588: ; preds = %ELSE2590, %IF2589
- %tmp215 = fsub float 1.000000e+00, %tmp214
- %tmp216 = call float @llvm.sqrt.f32(float %tmp215)
- %tmp217 = fmul float %tmp216, 0x7FF8000000000000
- %tmp218 = fadd float %tmp217, 0x7FF8000000000000
- br label %ENDIF2564
-
ELSE2593: ; preds = %ELSE2566
%tmp219 = fcmp oeq float %temp292.11, %tmp81
%tmp220 = fcmp olt float %tmp81, %tmp83
@@ -298,24 +285,20 @@ ELSE2596: ; preds = %ELSE2593
%tmp222 = fcmp oeq float %temp292.11, %tmp100
%tmp223 = fcmp olt float %tmp100, %tmp102
%tmp224 = and i1 %tmp222, %tmp223
- br i1 %tmp224, label %ENDIF2594, label %ELSE2632
+ %undef_ELSE2596 = freeze i1 poison
+ %brmerge = or i1 %tmp224, %undef_ELSE2596
+ br i1 %brmerge, label %ENDIF2594, label %ELSE2650
-ENDIF2594: ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
- %temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
+ENDIF2594: ; preds = %ELSE2704, %ELSE2650, %ELSE2596, %ELSE2686, %ELSE2668, %ELSE2593
+ %temp894.2 = phi float [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ], [ 0.000000e+00, %ELSE2650 ], [ %spec.select6, %ELSE2704 ]
%tmp225 = fmul float %temp894.2, 0x7FF8000000000000
br label %ENDIF2564
-ELSE2632: ; preds = %ELSE2596
- br i1 undef, label %ENDIF2594, label %ELSE2650
-
-ELSE2650: ; preds = %ELSE2632
+ELSE2650: ; preds = %ELSE2596
%tmp226 = fcmp oeq float %temp292.11, %tmp110
%tmp227 = fcmp olt float %tmp110, %tmp111
%tmp228 = and i1 %tmp226, %tmp227
- br i1 %tmp228, label %IF2667, label %ELSE2668
-
-IF2667: ; preds = %ELSE2650
- br i1 undef, label %ENDIF2594, label %ELSE2671
+ br i1 %tmp228, label %ENDIF2594, label %ELSE2668
ELSE2668: ; preds = %ELSE2650
%tmp229 = fcmp oeq float %temp292.11, %tmp128
@@ -323,9 +306,6 @@ ELSE2668: ; preds = %ELSE2650
%tmp231 = and i1 %tmp229, %tmp230
br i1 %tmp231, label %ENDIF2594, label %ELSE2686
-ELSE2671: ; preds = %IF2667
- br label %ENDIF2594
-
ELSE2686: ; preds = %ELSE2668
%tmp232 = fcmp oeq float %temp292.11, %tmp145
%tmp233 = fcmp olt float %tmp145, 0x7FF8000000000000
@@ -336,37 +316,9 @@ ELSE2704: ; preds = %ELSE2686
%tmp235 = fcmp oeq float %temp292.11, %tmp180
%tmp236 = fcmp olt float %tmp180, 0x7FF8000000000000
%tmp237 = and i1 %tmp235, %tmp236
- br i1 %tmp237, label %ENDIF2594, label %ELSE2740
-
-ELSE2740: ; preds = %ELSE2704
- br i1 undef, label %IF2757, label %ELSE2758
-
-IF2757: ; preds = %ELSE2740
- br i1 undef, label %ENDIF2594, label %ELSE2761
-
-ELSE2758: ; preds = %ELSE2740
- br i1 undef, label %IF2775, label %ENDIF2594
-
-ELSE2761: ; preds = %IF2757
- br label %ENDIF2594
-
-IF2775: ; preds = %ELSE2758
- %tmp238 = fcmp olt float 0x7FF8000000000000, 0x7FF8000000000000
- br i1 %tmp238, label %ENDIF2594, label %ELSE2779
-
-ELSE2779: ; preds = %IF2775
- br i1 undef, label %ENDIF2594, label %ELSE2782
-
-ELSE2782: ; preds = %ELSE2779
- br i1 undef, label %ENDIF2594, label %ELSE2785
-
-ELSE2785: ; preds = %ELSE2782
- %tmp239 = fcmp olt float 0x7FF8000000000000, 0.000000e+00
- br i1 %tmp239, label %ENDIF2594, label %ELSE2788
-
-ELSE2788: ; preds = %ELSE2785
- %tmp240 = fcmp olt float 0.000000e+00, 0x7FF8000000000000
- %.2848 = select i1 %tmp240, float -1.000000e+00, float 1.000000e+00
+ %undef.ELSE2704 = freeze i1 poison
+ %spec.select = select i1 %undef.ELSE2704, float 0.000000e+00, float %temp894.0
+ %spec.select6 = select i1 %tmp237, float 0.000000e+00, float %spec.select
br label %ENDIF2594
ELSE2797: ; preds = %ENDIF2564
@@ -386,22 +338,19 @@ ELSE2797: ; preds = %ENDIF2564
%tmp254 = call float @llvm.minnum.f32(float %tmp245, float %tmp251)
%tmp255 = call float @llvm.maxnum.f32(float %tmp246, float %tmp252)
%tmp256 = call float @llvm.maxnum.f32(float %tmp253, float %tmp254)
- %tmp257 = call float @llvm.maxnum.f32(float %tmp256, float poison)
- %tmp258 = call float @llvm.minnum.f32(float poison, float %tmp255)
+ %tmp257 = call float @llvm.maxnum.f32(float %tmp256, float 0x7FF8000000000000)
+ %tmp258 = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %tmp255)
%tmp259 = fcmp ogt float %tmp257, 0.000000e+00
%tmp260 = fcmp olt float %tmp257, 1.000000e+00
%tmp261 = and i1 %tmp259, %tmp260
%tmp262 = fcmp olt float %tmp257, %tmp258
%tmp263 = and i1 %tmp261, %tmp262
- br i1 %tmp263, label %ENDIF2795, label %ELSE2800
+ br i1 %tmp263, label %ENDIF2795, label %ELSE2803
-ENDIF2795: ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
+ENDIF2795: ; preds = %ELSE2806, %ELSE2797, %ELSE2824, %ELSE2821, %ELSE2803, %ENDIF2564
br label %LOOP
-ELSE2800: ; preds = %ELSE2797
- br i1 undef, label %ENDIF2795, label %ELSE2803
-
-ELSE2803: ; preds = %ELSE2800
+ELSE2803: ; preds = %ELSE2797
%tmp264 = fsub float %tmp20, 0x7FF8000000000000
%tmp265 = fsub float %tmp21, 0x7FF8000000000000
%tmp266 = fsub float %tmp22, 0x7FF8000000000000
@@ -417,9 +366,9 @@ ELSE2803: ; preds = %ELSE2800
%tmp276 = call float @llvm.minnum.f32(float %tmp267, float %tmp273)
%tmp277 = call float @llvm.maxnum.f32(float %tmp268, float %tmp274)
%tmp278 = call float @llvm.maxnum.f32(float %tmp269, float %tmp275)
- %tmp279 = call float @llvm.maxnum.f32(float %tmp276, float poison)
- %tmp280 = call float @llvm.maxnum.f32(float %tmp279, float poison)
- %tmp281 = call float @llvm.minnum.f32(float poison, float %tmp277)
+ %tmp279 = call float @llvm.maxnum.f32(float %tmp276, float 0x7FF8000000000000)
+ %tmp280 = call float @llvm.maxnum.f32(float %tmp279, float 0x7FF8000000000000)
+ %tmp281 = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %tmp277)
%tmp282 = call float @llvm.minnum.f32(float %tmp281, float %tmp278)
%tmp283 = fcmp ogt float %tmp280, 0.000000e+00
%tmp284 = fcmp olt float %tmp280, 1.000000e+00
@@ -438,31 +387,19 @@ ELSE2806: ; preds = %ELSE2803
%tmp294 = fsub float %tmp29, 0x7FF8000000000000
%tmp295 = fmul float %tmp294, 0x7FF8000000000000
%tmp296 = call float @llvm.minnum.f32(float %tmp291, float %tmp295)
- %tmp297 = call float @llvm.minnum.f32(float %tmp292, float poison)
- %tmp298 = call float @llvm.maxnum.f32(float %tmp293, float poison)
+ %tmp297 = call float @llvm.minnum.f32(float %tmp292, float 0x7FF8000000000000)
+ %tmp298 = call float @llvm.maxnum.f32(float %tmp293, float 0x7FF8000000000000)
%tmp299 = call float @llvm.maxnum.f32(float %tmp296, float %tmp297)
- %tmp300 = call float @llvm.maxnum.f32(float %tmp299, float poison)
- %tmp301 = call float @llvm.minnum.f32(float poison, float %tmp298)
+ %tmp300 = call float @llvm.maxnum.f32(float %tmp299, float 0x7FF8000000000000)
+ %tmp301 = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %tmp298)
%tmp302 = fcmp ogt float %tmp300, 0.000000e+00
%tmp303 = fcmp olt float %tmp300, 1.000000e+00
%tmp304 = and i1 %tmp302, %tmp303
%tmp305 = fcmp olt float %tmp300, %tmp301
%tmp306 = and i1 %tmp304, %tmp305
- br i1 %tmp306, label %ENDIF2795, label %ELSE2809
-
-ELSE2809: ; preds = %ELSE2806
- br i1 undef, label %ENDIF2795, label %ELSE2812
-
-ELSE2812: ; preds = %ELSE2809
- br i1 undef, label %ENDIF2795, label %ELSE2815
-
-ELSE2815: ; preds = %ELSE2812
- br i1 undef, label %ENDIF2795, label %ELSE2818
-
-ELSE2818: ; preds = %ELSE2815
- br i1 undef, label %ENDIF2795, label %ELSE2821
+ br i1 %tmp306, label %ENDIF2795, label %ELSE2821
-ELSE2821: ; preds = %ELSE2818
+ELSE2821: ; preds = %ELSE2806
%tmp307 = fsub float %tmp56, 0x7FF8000000000000
%tmp308 = fsub float %tmp57, 0x7FF8000000000000
%tmp309 = fsub float %tmp58, 0x7FF8000000000000
@@ -488,7 +425,8 @@ ELSE2821: ; preds = %ELSE2818
br i1 %tmp328, label %ENDIF2795, label %ELSE2824
ELSE2824: ; preds = %ELSE2821
- %.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
+ %undef = freeze i1 poison
+ %.2849 = select i1 %undef, float 0.000000e+00, float 1.000000e+00
br label %ENDIF2795
}
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index 244a90fa0c4c4..7e0341efad6f8 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1134,17 +1134,19 @@ exit:
}
; bug 28550
-define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
+define amdgpu_ps void @phi_use_def_before_kill(float inreg %x, i32 inreg %y) #0 {
; SI-LABEL: phi_use_def_before_kill:
; SI: ; %bb.0: ; %bb
; SI-NEXT: v_add_f32_e64 v1, s0, 1.0
; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
+; SI-NEXT: s_mov_b64 s[2:3], exec
; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
-; SI-NEXT: s_andn2_b64 exec, exec, vcc
+; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
; SI-NEXT: s_cbranch_scc0 .LBB11_6
; SI-NEXT: ; %bb.1: ; %bb
; SI-NEXT: s_andn2_b64 exec, exec, vcc
+; SI-NEXT: s_cmp_lg_u32 s1, 0
; SI-NEXT: s_cbranch_scc0 .LBB11_3
; SI-NEXT: ; %bb.2: ; %bb8
; SI-NEXT: s_mov_b32 s3, 0xf000
@@ -1172,13 +1174,15 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
; GFX10-WAVE64-LABEL: phi_use_def_before_kill:
; GFX10-WAVE64: ; %bb.0: ; %bb
; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0
+; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
-; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
+; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_6
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
+; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s1, 0
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_3
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8
@@ -1202,13 +1206,15 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
; GFX10-WAVE32-LABEL: phi_use_def_before_kill:
; GFX10-WAVE32: ; %bb.0: ; %bb
; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0
+; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo
; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1
; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1
-; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
+; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_6
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
+; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s1, 0
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_3
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8
@@ -1232,14 +1238,16 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
; GFX11-LABEL: phi_use_def_before_kill:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0
+; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
-; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
+; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB11_6
; GFX11-NEXT: ; %bb.1: ; %bb
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
+; GFX11-NEXT: s_cmp_lg_u32 s1, 0
; GFX11-NEXT: s_cbranch_scc0 .LBB11_3
; GFX11-NEXT: ; %bb.2: ; %bb8
; GFX11-NEXT: v_mov_b32_e32 v1, 8
@@ -1265,7 +1273,8 @@ bb:
%tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
%cmp.tmp2 = fcmp olt float %tmp2, 0.0
call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
- br i1 undef, label %phibb, label %bb8
+ %uniform.cond = icmp eq i32 %y, 0
+ br i1 %uniform.cond, label %phibb, label %bb8
phibb:
%tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
diff --git a/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll b/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
index 3176257920a7a..71e4755b58bf2 100644
--- a/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
+++ b/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
@@ -34,7 +34,7 @@ entry:
%conv = add i32 %i6, %i7
%conv.frozen = freeze i32 %conv
%div = udiv i32 %conv.frozen, 49
- %add.ptr22 = getelementptr inbounds float, ptr addrspace(4) %wei_ptr, i64 undef
+ %add.ptr22 = getelementptr inbounds float, ptr addrspace(4) %wei_ptr, i64 0
%in.ptr1 = getelementptr inbounds float, ptr addrspace(1) %in, i32 %i5
br label %for.cond28.preheader
@@ -471,11 +471,11 @@ for.cond28.preheader: ; preds = %for.cond28.preheade
br i1 %exitcond.not, label %for.cond.cleanup26, label %for.cond28.preheader
for.cond.cleanup26: ; preds = %for.cond28.preheader
- %mul119 = shl nuw nsw i32 undef, 1
+ %mul119 = shl nuw nsw i32 0, 1
%mul120 = mul i32 %div, 200704
- %mul121 = mul i32 undef, 6272
+ %mul121 = mul i32 0, 6272
%add122 = add i32 %mul120, %mul121
- %mul123 = mul nuw nsw i32 undef, 28
+ %mul123 = mul nuw nsw i32 0, 28
%add124 = add i32 %add122, %mul123
%add126 = add i32 %add124, %mul119
%idx.ext127 = zext i32 %add126 to i64
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
index fc5f6d9dab796..b7e6ebaa655b9 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
@@ -87,18 +87,18 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %301:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %302:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %356:sgpr_128, undef %357:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %367:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %357:sgpr_128, undef %358:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %368:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %351:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %352:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %363:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc
; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc
@@ -116,7 +116,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
- ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %383:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %384:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4)
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4)
@@ -198,9 +198,9 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]]
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32))
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4)
- ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %469:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %470:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4)
; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1
- ; CHECK-NEXT: KILL undef %469:sreg_64
+ ; CHECK-NEXT: KILL undef %470:sreg_64
; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3
; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4)
@@ -211,8 +211,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]]
+ ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]]
@@ -236,10 +236,10 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
- ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]]
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]]
+ ; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
+ ; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
@@ -351,13 +351,13 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec
- ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %542:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %543:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4)
; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec
; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec
; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
- ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %556:vgpr_32, undef %558:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
+ ; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %557:vgpr_32, undef %559:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
; CHECK-NEXT: S_ENDPGM 0
.expVert:
%0 = extractelement <31 x i32> %userData, i64 2
@@ -406,7 +406,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
%40 = and i32 %rootDesc58.ii1.i, 65535
%41 = insertelement <4 x i32> <i32 poison, i32 poison, i32 -1, i32 553734060>, i32 %rootDesc58.ii0.i, i32 0
%42 = insertelement <4 x i32> %41, i32 %40, i32 1
- %43 = and i32 undef, 65535
+ %43 = and i32 0, 65535
%44 = insertelement <4 x i32> poison, i32 %43, i32 1
%45 = load <4 x i32>, ptr addrspace(4) poison, align 16
%46 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %45, i32 0, i32 0, i32 0, i32 0)
@@ -470,7 +470,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
%104 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %103, i32 0, i32 0, i32 0, i32 0)
%105 = add i32 %104, -34
%106 = or i32 %101, %105
- %107 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
+ %undef = freeze i32 poison
+ %107 = call i32 @llvm.amdgcn.readfirstlane(i32 %undef)
%108 = sext i32 %107 to i64
%109 = getelementptr i8, ptr addrspace(4) %91, i64 %108
%110 = load <4 x i32>, ptr addrspace(4) %109, align 16
@@ -490,7 +491,8 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
%124 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
%125 = add i32 %124, -39
%126 = or i32 %123, %125
- %127 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
+ %undef1 = freeze i32 poison
+ %127 = call i32 @llvm.amdgcn.readfirstlane(i32 %undef1)
%128 = sext i32 %127 to i64
%129 = getelementptr i8, ptr addrspace(4) %32, i64 %128
%130 = load <4 x i32>, ptr addrspace(4) %129, align 16
@@ -513,7 +515,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
%147 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %146, i32 0, i32 0, i32 0, i32 0)
%148 = add i32 %147, -53
%149 = or i32 %144, %148
- %150 = sext i32 undef to i64
+ %150 = sext i32 0 to i64
%151 = getelementptr i8, ptr addrspace(4) %134, i64 %150
%152 = load <4 x i32>, ptr addrspace(4) %151, align 16
%153 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %152, i32 0, i32 0, i32 0, i32 0)
@@ -574,7 +576,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
%208 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %207, i32 0)
%209 = add i32 %208, -130
%210 = or i32 %205, %209
- %211 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 undef, i32 0
+ %211 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 0, i32 0
%212 = ptrtoint ptr addrspace(6) %211 to i32
%213 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %212, i32 0)
%214 = add i32 %213, -178
@@ -617,7 +619,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
%251 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 %250, i32 0)
%252 = add i32 %251, -249
%253 = or i32 %248, %252
- %254 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 undef, i32 0
+ %254 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 0, i32 0
%255 = ptrtoint ptr addrspace(6) %254 to i32
%256 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 %255, i32 0)
%257 = add i32 %256, -297
@@ -661,7 +663,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
%295 = sext i32 %294 to i64
%296 = getelementptr i8, ptr addrspace(4) %293, i64 %295
%.ii0.i = load i32, ptr addrspace(4) %296, align 8
- %297 = and i32 undef, 65535
+ %297 = and i32 0, 65535
%298 = insertelement <4 x i32> <i32 poison, i32 poison, i32 -1, i32 553734060>, i32 %.ii0.i, i32 0
%299 = insertelement <4 x i32> %298, i32 %297, i32 1
%300 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %299, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
index 4f551d4c9de1a..78103d5e40425 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
+++ b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
@@ -8,6 +8,7 @@
define amdgpu_kernel void @func() #0 {
; CHECK-LABEL: func:
; CHECK: ; %bb.0: ; %B0
+; CHECK-NEXT: s_cmp_lg_u32 s8, 0
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
; CHECK-NEXT: ; %bb.1: ; %B30.1
@@ -18,17 +19,19 @@ define amdgpu_kernel void @func() #0 {
; CHECK-NEXT: ds_write_b32 v0, v0
; CHECK-NEXT: s_endpgm
B0:
- br i1 undef, label %B1, label %B2
+ %id = call i32 @llvm.amdgcn.workgroup.id.x()
+ %cmp = icmp eq i32 %id, 0
+ br i1 %cmp, label %B1, label %B2
B1:
br label %B2
B2:
%v0 = phi <4 x float> [ zeroinitializer, %B1 ], [ <float 0.0, float 0.0, float 0.0, float poison>, %B0 ]
- br i1 undef, label %B30.1, label %B30.2
+ br i1 %cmp, label %B30.1, label %B30.2
B30.1:
- %sub = fsub <4 x float> %v0, undef
+ %sub = fsub <4 x float> %v0, splat (float 0x7FF8000000000000)
br label %B30.2
B30.2:
@@ -73,7 +76,7 @@ bb:
%tmp3 = bitcast i32 %tmp1 to float
%tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp3, float %tmp3, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp5 = extractelement <4 x float> %tmp4, i32 0
- %tmp6 = fmul float %tmp5, undef
+ %tmp6 = fmul float %tmp5, 0x7FF8000000000000
%tmp7 = fadd float %tmp6, %tmp6
%tmp8 = insertelement <4 x i32> %tmp2, i32 %tmp, i32 1
store <4 x i32> %tmp8, ptr addrspace(1) poison, align 16
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
index 9a330a2683097..374c6701f1ba6 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
@@ -1150,6 +1150,10 @@ define void @move_to_valu_vgpr_operand_phi(ptr addrspace(3) %out) {
; SI-NEXT: v_add_i32_e64 v0, s[4:5], 8, v0
; SI-NEXT: .LBB20_2: ; %bb1
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
+; SI-NEXT: ;;#ASMSTART
+; SI-NEXT: ; def s4
+; SI-NEXT: ;;#ASMEND
+; SI-NEXT: s_cmp_lg_u32 s4, 0
; SI-NEXT: s_cbranch_scc1 .LBB20_1
; SI-NEXT: ; %bb.3: ; %bb2
; SI-NEXT: ; in Loop: Header=BB20_2 Depth=1
@@ -1173,6 +1177,10 @@ define void @move_to_valu_vgpr_operand_phi(ptr addrspace(3) %out) {
; VI-NEXT: v_add_u32_e64 v0, s[4:5], 8, v0
; VI-NEXT: .LBB20_2: ; %bb1
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
+; VI-NEXT: ;;#ASMSTART
+; VI-NEXT: ; def s4
+; VI-NEXT: ;;#ASMEND
+; VI-NEXT: s_cmp_lg_u32 s4, 0
; VI-NEXT: s_cbranch_scc1 .LBB20_1
; VI-NEXT: ; %bb.3: ; %bb2
; VI-NEXT: ; in Loop: Header=BB20_2 Depth=1
@@ -1189,7 +1197,9 @@ bb1: ; preds = %bb3, %bb0
%tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ]
%tmp1 = add nsw i32 %tmp0, -1
%tmp2 = getelementptr inbounds i32, ptr addrspace(3) %out, i32 %tmp1
- br i1 undef, label %bb2, label %bb3
+ %cond = call i32 asm "; def $0","=s"()
+ %cmp = icmp eq i32 %cond, 0
+ br i1 %cmp, label %bb2, label %bb3
bb2: ; preds = %bb1
store volatile i32 1, ptr addrspace(3) %tmp2, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index 3e5b8b1b13db6..aea25b37e8f4e 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -538,7 +538,8 @@ if.then: ; preds = %entry
ret void
if.then9: ; preds = %entry
- br i1 undef, label %sw.bb18, label %sw.bb
+ %undef = freeze i1 poison
+ br i1 %undef, label %sw.bb18, label %sw.bb
sw.bb: ; preds = %if.then9
%i17 = load i8, ptr addrspace(1) null, align 1
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index baf9e9df91689..4212fd3b35cd8 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1511,7 +1511,7 @@ define amdgpu_kernel void @test_preserve_condition_undef_flag(float %arg, i32 %a
; GFX1064-NEXT: s_endpgm
bb0:
%tmp = icmp sgt i32 %arg1, 4
- %undef = call i1 @llvm.amdgcn.class.f32(float poison, i32 undef)
+ %undef = call i1 @llvm.amdgcn.class.f32(float poison, i32 0)
%tmp4 = select i1 %undef, float %arg, float 1.000000e+00
%tmp5 = fcmp ogt float %arg2, 0.000000e+00
%tmp6 = fcmp olt float %arg2, 1.000000e+00
@@ -2329,7 +2329,7 @@ for.body.lr.ph: ; preds = %entry
br label %for.body
for.body: ; preds = %for.body, %for.body.lr.ph
- br i1 undef, label %for.end, label %for.body
+ br i1 poison, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll b/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll
index 519cc1478a434..ed57628fa721c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll
@@ -16,7 +16,7 @@ define amdgpu_cs void @shader(i32 %arg0, i32 %arg1, <8 x i32> inreg %arg2, ptr a
%bload1.f = bitcast i32 %bload1 to float
%bload2.f = bitcast i32 %bload2 to float
%bload3.f = bitcast i32 %bload3 to float
- %istore0 = insertelement <4 x float> undef, float %bload0.f, i32 0
+ %istore0 = insertelement <4 x float> poison, float %bload0.f, i32 0
%istore1 = insertelement <4 x float> %istore0, float %bload0.f, i32 1
%istore2 = insertelement <4 x float> %istore1, float %bload0.f, i32 2
%istore3 = insertelement <4 x float> %istore2, float %bload0.f, i32 3
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
index 883657547519b..b2f299d531f5c 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
@@ -59,7 +59,7 @@
br i1 %0, label %bb2, label %bb4, !dbg !12, !amdgpu.uniform !7
bb2: ; preds = %Flow
- store volatile i32 17, ptr addrspace(1) undef, align 4, !dbg !13
+ store volatile i32 17, ptr addrspace(1) poison, align 4, !dbg !13
br label %bb4, !dbg !14, !amdgpu.uniform !7
bb3: ; preds = %bb0
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
index 278bf086d6088..93f2c343cd051 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
@@ -51,7 +51,7 @@ bb0:
br i1 %tmp, label %bb2, label %bb3
bb2:
- store volatile i32 17, ptr addrspace(1) undef
+ store volatile i32 17, ptr addrspace(1) poison
br label %bb4
bb3:
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir
index af0f28f6b5d74..aa30e20dd7f06 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir
@@ -33,11 +33,11 @@ body: |
; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM
%0 = COPY $sgpr4_sgpr5
- %1 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`)
- %2 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( dereferenceable invariant load (s64) from `ptr addrspace(4) undef`)
- %3 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( invariant load (s64) from `ptr addrspace(4) undef`)
- %4 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(4) undef`)
- %5 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(2) undef`)
- %6 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(1) undef`)
+ %1 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
+ %2 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
+ %3 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( invariant load (s64) from `ptr addrspace(4) poison`)
+ %4 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(4) poison`)
+ %5 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(2) poison`)
+ %6 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `ptr addrspace(1) poison`)
...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir b/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir
index c28a4405d488c..db18d5433da3b 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir
@@ -74,14 +74,14 @@ body: |
liveins: $sgpr4_sgpr5
S_WAITCNT 0
- $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`)
- $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) undef`)
- $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`)
- $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) undef`)
- $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) undef`)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
+ $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) poison`)
+ $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
+ $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) poison`)
+ $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load (s32) from `ptr addrspace(4) poison`)
S_WAITCNT 127
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
- $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`)
+ $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) poison`)
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, 19, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst (s32) into %ir.agent_out)
More information about the llvm-commits
mailing list